diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-03 05:11:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-03 05:11:10 +0000 |
commit | cff6d757e3ba609c08ef2aaa00f07e53551e5bf6 (patch) | |
tree | 08c4fc3255483ad397d712edb4214ded49149fd9 /src | |
parent | Adding upstream version 2.9.7. (diff) | |
download | haproxy-upstream/3.0.0.tar.xz haproxy-upstream/3.0.0.zip |
Adding upstream version 3.0.0.upstream/3.0.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src')
122 files changed, 20323 insertions, 11993 deletions
@@ -546,6 +546,25 @@ struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list * */ if (!pat_ref_add(ref, arg, NULL, err)) goto out_free_expr; + + if (global.mode & MODE_DIAG) { + if (strcmp(arg, "&&") == 0 || strcmp(arg, "and") == 0 || + strcmp(arg, "||") == 0 || strcmp(arg, "or") == 0) + ha_diag_warning("parsing [%s:%d] : pattern '%s' looks like a failed attempt at using an operator inside a pattern list\n", file, line, arg); + else if (strcmp(arg, "#") == 0 || strcmp(arg, "//") == 0) + ha_diag_warning("parsing [%s:%d] : pattern '%s' looks like a failed attempt at commenting an end of line\n", file, line, arg); + else if (find_acl_kw(arg)) + ha_diag_warning("parsing [%s:%d] : pattern '%s' suspiciously looks like a known acl keyword\n", file, line, arg); + else { + const char *begw = arg, *endw; + + for (endw = begw; is_idchar(*endw); endw++) + ; + + if (endw != begw && find_sample_fetch(begw, endw - begw)) + ha_diag_warning("parsing [%s:%d] : pattern '%s' suspiciously looks like a known sample fetch keyword\n", file, line, arg); + } + } args++; } @@ -1331,7 +1350,11 @@ int smp_fetch_acl_parse(struct arg *args, char **err_msg) name++; } - if (!(acl_sample->terms[i].acl = find_acl_by_name(name, &curproxy->acl))) { + + if ( + !(acl_sample->terms[i].acl = find_acl_by_name(name, &curproxy->acl)) && + !(acl_sample->terms[i].acl = find_acl_default(name, &curproxy->acl, err_msg, NULL, NULL, 0)) + ) { memprintf(err_msg, "ACL '%s' not found", name); goto err; } diff --git a/src/activity.c b/src/activity.c index 07a30e6..5417deb 100644 --- a/src/activity.c +++ b/src/activity.c @@ -647,17 +647,12 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) unsigned long long tot_alloc_calls, tot_free_calls; unsigned long long tot_alloc_bytes, tot_free_bytes; #endif - struct stconn *sc = appctx_sc(appctx); struct buffer *name_buffer = get_trash_chunk(); const struct ha_caller *caller; const char *str; int max_lines; int i, j, max; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); switch (profiling & HA_PROF_TASKS_MASK) { @@ -808,8 +803,14 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) else chunk_appendf(&trash, "[other]"); - chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], - (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls)); + if ((tmp_memstats[i].method != MEMPROF_METH_P_ALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_MALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_CALLOC)) { + chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], + (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls)); + } else + chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], + (long long)(entry->alloc_tot) / (long long)(entry->alloc_calls)); if (entry->alloc_tot && entry->free_tot) { /* that's a realloc, show the total diff to help spot leaks */ @@ -834,9 +835,13 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) tot_alloc_calls = tot_free_calls = tot_alloc_bytes = tot_free_bytes = 0; for (i = 0; i < max_lines; i++) { tot_alloc_calls += tmp_memstats[i].alloc_calls; - tot_free_calls += tmp_memstats[i].free_calls; tot_alloc_bytes += tmp_memstats[i].alloc_tot; - tot_free_bytes += tmp_memstats[i].free_tot; + if ((tmp_memstats[i].method != MEMPROF_METH_P_ALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_MALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_CALLOC)) { + tot_free_calls += tmp_memstats[i].free_calls; + tot_free_bytes += tmp_memstats[i].free_tot; + } } chunk_appendf(&trash, @@ -911,7 +916,6 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a static int cli_io_handler_show_tasks(struct appctx *appctx) { struct sched_activity tmp_activity[SCHED_ACT_HASH_BUCKETS] __attribute__((aligned(64))); - struct stconn *sc = appctx_sc(appctx); struct buffer *name_buffer = get_trash_chunk(); struct sched_activity *entry; const struct tasklet *tl; @@ -922,10 +926,6 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) int thr, queue; int i, max; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - /* It's not possible to scan queues in small chunks and yield in the * middle of the dump and come back again. So what we're doing instead * is to freeze all threads and inspect their queues at once as fast as @@ -1057,17 +1057,12 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) */ static int cli_io_handler_show_activity(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct show_activity_ctx *actctx = appctx->svcctx; int tgt = actctx->thr; // target thread, -1 for all, 0 for total only uint up_sec, up_usec; int base_line; ullong up; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - /* this macro is used below to dump values. The thread number is "thr", * and runs from 0 to nbt-1 when values are printed using the formula. * We normally try to dmup integral lines in order to keep counters diff --git a/src/applet.c b/src/applet.c index b695a9f..c528963 100644 --- a/src/applet.c +++ b/src/applet.c @@ -15,13 +15,17 @@ #include <haproxy/api.h> #include <haproxy/applet.h> +#include <haproxy/cfgparse.h> #include <haproxy/channel.h> +#include <haproxy/htx.h> #include <haproxy/list.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/task.h> #include <haproxy/trace.h> +#include <haproxy/vecpair.h> +#include <haproxy/xref.h> unsigned int nb_applets = 0; @@ -50,6 +54,14 @@ static const struct trace_event applet_trace_events[] = { { .mask = APPLET_EV_ERR, .name = "app_err", .desc = "error on appctx" }, #define APPLET_EV_START (1ULL << 5) { .mask = APPLET_EV_START, .name = "app_start", .desc = "start appctx" }, +#define APPLET_EV_RECV (1ULL << 6) + { .mask = APPLET_EV_START, .name = "app_receive", .desc = "RX on appctx" }, +#define APPLET_EV_SEND (1ULL << 7) + { .mask = APPLET_EV_START, .name = "app_send", .desc = "TX on appctx" }, +#define APPLET_EV_BLK (1ULL << 8) + { .mask = APPLET_EV_START, .name = "app_blk", .desc = "appctx blocked" }, +#define APPLET_EV_WAKE (1ULL << 9) + { .mask = APPLET_EV_START, .name = "app_wake", .desc = "appctx woken up" }, {} }; @@ -129,9 +141,9 @@ static void applet_trace(enum trace_level level, uint64_t mask, const struct tra if (src->verbosity == STRM_VERB_CLEAN) return; - chunk_appendf(&trace_buf, " appctx=%p .t=%p .t.exp=%d .state=%d .st0=%d .st1=%d", + chunk_appendf(&trace_buf, " appctx=%p .t=%p .t.exp=%d .flags=0x%x .st0=%d .st1=%d to_fwd=%lu", appctx, appctx->t, tick_isset(appctx->t->expire) ? TICKS_TO_MS(appctx->t->expire - now_ms) : TICK_ETERNITY, - appctx->state, appctx->st0, appctx->st1); + appctx->flags, appctx->st0, appctx->st1, (ulong)appctx->to_forward); if (!sc || src->verbosity == STRM_VERB_MINIMAL) return; @@ -167,21 +179,41 @@ static void applet_trace(enum trace_level level, uint64_t mask, const struct tra (src->verbosity == STRM_VERB_ADVANCED && src->level < TRACE_LEVEL_DATA)) return; - /* channels' buffer info */ - if (s->flags & SF_HTX) { - struct htx *ichtx = htxbuf(&ic->buf); - struct htx *ochtx = htxbuf(&oc->buf); + if (appctx->t->process == task_run_applet) { + /* channels' buffer info */ + if (s->flags & SF_HTX) { + struct htx *ichtx = htxbuf(&ic->buf); + struct htx *ochtx = htxbuf(&oc->buf); - chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", - ichtx->data, ichtx->size, htx_nbblks(ichtx), - ochtx->data, ochtx->size, htx_nbblks(ochtx)); + chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", + ichtx->data, ichtx->size, htx_nbblks(ichtx), + ochtx->data, ochtx->size, htx_nbblks(ochtx)); + } + else { + chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", + (unsigned int)b_data(&ic->buf), b_orig(&ic->buf), + (unsigned int)b_head_ofs(&ic->buf), (unsigned int)b_size(&ic->buf), + (unsigned int)b_data(&oc->buf), b_orig(&oc->buf), + (unsigned int)b_head_ofs(&oc->buf), (unsigned int)b_size(&oc->buf)); + } } else { - chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", - (unsigned int)b_data(&ic->buf), b_orig(&ic->buf), - (unsigned int)b_head_ofs(&ic->buf), (unsigned int)b_size(&ic->buf), - (unsigned int)b_data(&oc->buf), b_orig(&oc->buf), - (unsigned int)b_head_ofs(&oc->buf), (unsigned int)b_size(&oc->buf)); + /* RX/TX buffer info */ + if (s->flags & SF_HTX) { + struct htx *rxhtx = htxbuf(&appctx->inbuf); + struct htx *txhtx = htxbuf(&appctx->outbuf); + + chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", + rxhtx->data, rxhtx->size, htx_nbblks(rxhtx), + txhtx->data, txhtx->size, htx_nbblks(txhtx)); + } + else { + chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", + (unsigned int)b_data(&appctx->inbuf), b_orig(&appctx->inbuf), + (unsigned int)b_head_ofs(&appctx->inbuf), (unsigned int)b_size(&appctx->inbuf), + (unsigned int)b_data(&appctx->outbuf), b_orig(&appctx->outbuf), + (unsigned int)b_head_ofs(&appctx->outbuf), (unsigned int)b_size(&appctx->outbuf)); + } } } @@ -207,7 +239,7 @@ struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int t goto fail_appctx; } - LIST_INIT(&appctx->wait_entry); + MT_LIST_INIT(&appctx->wait_entry); appctx->obj_type = OBJ_TYPE_APPCTX; appctx->applet = applet; appctx->sess = NULL; @@ -229,7 +261,18 @@ struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int t } appctx->sedesc = sedesc; - appctx->t->process = task_run_applet; + + appctx->flags = 0; + appctx->inbuf = BUF_NULL; + appctx->outbuf = BUF_NULL; + appctx->to_forward = 0; + + if (applet->rcv_buf != NULL && applet->snd_buf != NULL) { + appctx->t->process = task_process_applet; + applet_fl_set(appctx, APPCTX_FL_INOUT_BUFS); + } + else + appctx->t->process = task_run_applet; appctx->t->context = appctx; LIST_INIT(&appctx->buffer_wait.list); @@ -314,7 +357,7 @@ void appctx_free(struct appctx *appctx) /* if it's running, or about to run, defer the freeing * until the callback is called. */ - appctx->state |= APPLET_WANT_DIE; + applet_fl_set(appctx, APPCTX_FL_WANT_DIE); task_wakeup(appctx->t, TASK_WOKEN_OTHER); TRACE_DEVEL("Cannot release APPCTX now, wake it up", APPLET_EV_FREE, appctx); } @@ -348,55 +391,366 @@ void applet_reset_svcctx(struct appctx *appctx) appctx->svcctx = NULL; } -/* call the applet's release() function if any, and marks the sedesc as shut. - * Needs to be called upon close(). +/* call the applet's release() function if any, and marks the sedesc as shut + * once both read and write side are shut. Needs to be called upon close(). */ void appctx_shut(struct appctx *appctx) { - if (se_fl_test(appctx->sedesc, SE_FL_SHR | SE_FL_SHW)) + if (applet_fl_test(appctx, APPCTX_FL_SHUTDOWN)) return; TRACE_ENTER(APPLET_EV_RELEASE, appctx); + if (appctx->applet->release) appctx->applet->release(appctx); + applet_fl_set(appctx, APPCTX_FL_SHUTDOWN); - if (LIST_INLIST(&appctx->buffer_wait.list)) - LIST_DEL_INIT(&appctx->buffer_wait.list); + b_dequeue(&appctx->buffer_wait); - se_fl_set(appctx->sedesc, SE_FL_SHRR | SE_FL_SHWN); TRACE_LEAVE(APPLET_EV_RELEASE, appctx); } +/* releases unused buffers after processing. It will try to wake up as many + * entities as the number of buffers that it releases. + */ +static void appctx_release_buffers(struct appctx * appctx) +{ + int offer = 0; + + if (b_size(&appctx->inbuf) && !b_data(&appctx->inbuf)) { + offer++; + b_free(&appctx->inbuf); + } + if (b_size(&appctx->outbuf) && !b_data(&appctx->outbuf)) { + offer++; + b_free(&appctx->outbuf); + } + + /* if we're certain to have at least 1 buffer available, and there is + * someone waiting, we can wake up a waiter and offer them. + */ + if (offer) + offer_buffers(appctx, offer); +} + /* Callback used to wake up an applet when a buffer is available. The applet * <appctx> is woken up if an input buffer was requested for the associated - * stream connector. In this case the buffer is immediately allocated and the - * function returns 1. Otherwise it returns 0. Note that this automatically - * covers multiple wake-up attempts by ensuring that the same buffer will not - * be accounted for multiple times. + * stream connector. In this case the buffer is expected to be allocated later, + * the applet is woken up, and the function returns 1 to mention this buffer is + * expected to be used. Otherwise it returns 0. */ int appctx_buf_available(void *arg) { struct appctx *appctx = arg; struct stconn *sc = appctx_sc(appctx); + int ret = 0; + + if (applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC)) { + applet_fl_clr(appctx, APPCTX_FL_INBLK_ALLOC); + applet_fl_set(appctx, APPCTX_FL_IN_MAYALLOC); + TRACE_STATE("unblocking appctx on inbuf allocation", APPLET_EV_RECV|APPLET_EV_BLK|APPLET_EV_WAKE, appctx); + ret = 1; + } + + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC)) { + applet_fl_clr(appctx, APPCTX_FL_OUTBLK_ALLOC); + applet_fl_set(appctx, APPCTX_FL_OUT_MAYALLOC); + TRACE_STATE("unblocking appctx on outbuf allocation", APPLET_EV_SEND|APPLET_EV_BLK|APPLET_EV_WAKE, appctx); + ret = 1; + } + + /* allocation requested ? if no, give up. */ + if (sc->flags & SC_FL_NEED_BUFF) { + sc_have_buff(sc); + ret = 1; + } + + /* The requested buffer might already have been allocated (channel, + * fast-forward etc), in which case we won't need to take that one. + * Otherwise we expect to take it. + */ + if (!c_size(sc_ic(sc)) && !sc_ep_have_ff_data(sc_opposite(sc))) + ret = 1; + leave: + if (ret) + task_wakeup(appctx->t, TASK_WOKEN_RES); + return ret; +} + +size_t appctx_htx_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct htx *appctx_htx = htx_from_buf(&appctx->outbuf); + struct htx *buf_htx = NULL; + size_t ret = 0; + + if (htx_is_empty(appctx_htx)) { + htx_to_buf(appctx_htx, &appctx->outbuf); + goto out; + } + + ret = appctx_htx->data; + buf_htx = htx_from_buf(buf); + if (htx_is_empty(buf_htx) && htx_used_space(appctx_htx) <= count) { + htx_to_buf(buf_htx, buf); + htx_to_buf(appctx_htx, &appctx->outbuf); + b_xfer(buf, &appctx->outbuf, b_data(&appctx->outbuf)); + goto out; + } + + htx_xfer_blks(buf_htx, appctx_htx, count, HTX_BLK_UNUSED); + buf_htx->flags |= (appctx_htx->flags & (HTX_FL_PARSING_ERROR|HTX_FL_PROCESSING_ERROR)); + if (htx_is_empty(appctx_htx)) { + buf_htx->flags |= (appctx_htx->flags & HTX_FL_EOM); + } + buf_htx->extra = (appctx_htx->extra ? (appctx_htx->data + appctx_htx->extra) : 0); + htx_to_buf(buf_htx, buf); + htx_to_buf(appctx_htx, &appctx->outbuf); + ret -= appctx_htx->data; + + out: + return ret; +} + +size_t appctx_raw_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + return b_xfer(buf, &appctx->outbuf, MIN(count, b_data(&appctx->outbuf))); +} + +size_t appctx_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + size_t ret = 0; + + TRACE_ENTER(APPLET_EV_RECV, appctx); + + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC)) + goto end; + + if (!count) + goto end; + + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + TRACE_STATE("waiting for appctx outbuf allocation", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + goto end; + } + + if (flags & CO_RFL_BUF_FLUSH) + applet_fl_set(appctx, APPCTX_FL_FASTFWD); + + ret = appctx->applet->rcv_buf(appctx, buf, count, flags); + if (ret) + applet_fl_clr(appctx, APPCTX_FL_OUTBLK_FULL); + + if (b_data(&appctx->outbuf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + else { + se_fl_clr(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + if (applet_fl_test(appctx, APPCTX_FL_EOI)) { + se_fl_set(appctx->sedesc, SE_FL_EOI); + TRACE_STATE("report EOI to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_EOS)) { + se_fl_set(appctx->sedesc, SE_FL_EOS); + TRACE_STATE("report EOS to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_ERROR)) { + se_fl_set(appctx->sedesc, SE_FL_ERROR); + TRACE_STATE("report ERROR to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + } + + end: + TRACE_LEAVE(APPLET_EV_RECV, appctx); + return ret; +} + +size_t appctx_htx_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct htx *appctx_htx = htx_from_buf(&appctx->inbuf); + struct htx *buf_htx = htx_from_buf(buf); + size_t ret = 0; + + ret = buf_htx->data; + if (htx_is_empty(appctx_htx) && buf_htx->data == count) { + htx_to_buf(appctx_htx, &appctx->inbuf); + htx_to_buf(buf_htx, buf); + b_xfer(&appctx->inbuf, buf, b_data(buf)); + goto end; + } + + htx_xfer_blks(appctx_htx, buf_htx, count, HTX_BLK_UNUSED); + if (htx_is_empty(buf_htx)) { + appctx_htx->flags |= (buf_htx->flags & HTX_FL_EOM); + } + + appctx_htx->extra = (buf_htx->extra ? (buf_htx->data + buf_htx->extra) : 0); + htx_to_buf(appctx_htx, &appctx->outbuf); + htx_to_buf(buf_htx, buf); + ret -= buf_htx->data; +end: + if (ret < count) { + applet_fl_set(appctx, APPCTX_FL_INBLK_FULL); + TRACE_STATE("report appctx inbuf is full", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + } + return ret; +} + +size_t appctx_raw_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned flags) +{ + size_t ret = 0; + + ret = b_xfer(&appctx->inbuf, buf, MIN(b_room(&appctx->inbuf), count)); + if (ret < count) { + applet_fl_set(appctx, APPCTX_FL_INBLK_FULL); + TRACE_STATE("report appctx inbuf is full", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + } + end: + return ret; +} + +size_t appctx_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + size_t ret = 0; + + TRACE_ENTER(APPLET_EV_SEND, appctx); + + if (applet_fl_test(appctx, (APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING))) + goto end; + + if (applet_fl_test(appctx, (APPCTX_FL_INBLK_FULL|APPCTX_FL_INBLK_ALLOC))) + goto end; + + if (!count) + goto end; + + if (!appctx_get_buf(appctx, &appctx->inbuf)) { + TRACE_STATE("waiting for appctx inbuf allocation", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + goto end; + } + + ret = appctx->applet->snd_buf(appctx, buf, count, flags); + + end: + if (applet_fl_test(appctx, (APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING))) { + BUG_ON((applet_fl_get(appctx) & (APPCTX_FL_EOS|APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING)) == (APPCTX_FL_EOS|APPCTX_FL_ERR_PENDING)); + applet_set_error(appctx); + TRACE_STATE("report ERR_PENDING/ERROR to SE", APPLET_EV_SEND, appctx); + } + TRACE_LEAVE(APPLET_EV_SEND, appctx); + return ret; +} + +int appctx_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + struct xref *peer; + struct sedesc *sdo = NULL; + unsigned int len, nego_flags = NEGO_FF_FL_NONE; + int ret = 0; + + TRACE_ENTER(APPLET_EV_RECV, appctx); + + applet_fl_set(appctx, APPCTX_FL_FASTFWD); + + /* TODO: outbuf must be empty. Find a better way to handle that but for now just return -1 */ + if (b_data(&appctx->outbuf)) { + TRACE_STATE("Output buffer not empty, cannot fast-forward data", APPLET_EV_RECV, appctx); + return -1; + } + + peer = xref_get_peer_and_lock(&appctx->sedesc->xref); + if (!peer) { + TRACE_STATE("Opposite endpoint not available yet", APPLET_EV_RECV, appctx); + goto end; + } + sdo = container_of(peer, struct sedesc, xref); + xref_unlock(&appctx->sedesc->xref, peer); + + if (appctx->to_forward && count > appctx->to_forward) { + count = appctx->to_forward; + nego_flags |= NEGO_FF_FL_EXACT_SIZE; + } - /* allocation requested ? */ - if (!(sc->flags & SC_FL_NEED_BUFF)) - return 0; + len = se_nego_ff(sdo, &BUF_NULL, count, nego_flags); + if (sdo->iobuf.flags & IOBUF_FL_NO_FF) { + sc_ep_clr(sc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + TRACE_DEVEL("Fast-forwarding not supported by opposite endpoint, disable it", APPLET_EV_RECV, appctx); + goto end; + } + if (sdo->iobuf.flags & IOBUF_FL_FF_BLOCKED) { + sc_ep_set(sc, /* SE_FL_RCV_MORE | */SE_FL_WANT_ROOM); + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + goto end; + } + + b_add(sdo->iobuf.buf, sdo->iobuf.offset); + ret = appctx->applet->fastfwd(appctx, sdo->iobuf.buf, len, 0); + b_sub(sdo->iobuf.buf, sdo->iobuf.offset); + sdo->iobuf.data += ret; + + if (se_fl_test(appctx->sedesc, SE_FL_WANT_ROOM)) { + /* The applet request more room, report the info at the iobuf level */ + sdo->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + + if (applet_fl_test(appctx, APPCTX_FL_EOI)) { + se_fl_set(appctx->sedesc, SE_FL_EOI); + sdo->iobuf.flags |= IOBUF_FL_EOI; /* TODO: it may be good to have a flag to be sure we can + * forward the EOI the to consumer side + */ + TRACE_STATE("report EOI to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_EOS)) { + se_fl_set(appctx->sedesc, SE_FL_EOS); + TRACE_STATE("report EOS to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_ERROR)) { + se_fl_set(appctx->sedesc, SE_FL_ERROR); + TRACE_STATE("report ERROR to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + /* else */ + /* applet_have_more_data(appctx); */ - sc_have_buff(sc); + if (se_done_ff(sdo) != 0) { + /* Something was forwarding, don't reclaim more room */ + se_fl_clr(appctx->sedesc, SE_FL_WANT_ROOM); + TRACE_STATE("more room available", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + +end: + TRACE_LEAVE(APPLET_EV_RECV, appctx); + return ret; +} - /* was already allocated another way ? if so, don't take this one */ - if (c_size(sc_ic(sc)) || sc_ep_have_ff_data(sc_opposite(sc))) - return 0; +/* Atomically append a line to applet <ctx>'s output, appending a trailing LF. + * The line is read from vectors <v1> and <v2> at offset <ofs> relative to the + * area's origin, for <len> bytes. It returns the number of bytes consumed from + * the input vectors on success, -1 if it temporarily cannot (buffer full), -2 + * if it will never be able to (too large msg). The vectors are not modified. + * The caller is responsible for making sure that there are at least ofs+len + * bytes in the input vectors. + */ +ssize_t applet_append_line(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len) +{ + struct appctx *appctx = ctx; - /* allocation possible now ? */ - if (!b_alloc(&sc_ic(sc)->buf)) { - sc_need_buff(sc); - return 0; + if (unlikely(len + 1 > b_size(&trash))) { + /* too large a message to ever fit, let's skip it */ + return -2; } - task_wakeup(appctx->t, TASK_WOKEN_RES); - return 1; + chunk_reset(&trash); + vp_peek_ofs(v1, v2, ofs, trash.area, len); + trash.data += len; + trash.area[trash.data++] = '\n'; + if (applet_putchk(appctx, &trash) == -1) + return -1; + return len; } /* Default applet handler */ @@ -404,13 +758,14 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) { struct appctx *app = context; struct stconn *sc, *sco; + struct channel *ic, *oc; unsigned int rate; - size_t count; + size_t input, output; int did_send = 0; TRACE_ENTER(APPLET_EV_PROCESS, app); - if (app->state & APPLET_WANT_DIE) { + if (applet_fl_test(app, APPCTX_FL_WANT_DIE)) { TRACE_DEVEL("APPCTX want die, release it", APPLET_EV_FREE, app); __appctx_free(app); return NULL; @@ -434,6 +789,9 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) sc = appctx_sc(app); sco = sc_opposite(sc); + ic = sc_ic(sc); + oc = sc_oc(sc); + /* We always pretend the applet can't get and doesn't want to * put, it's up to it to change this if needed. This ensures * that one applet which ignores any event will not spin. @@ -450,7 +808,10 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) if (!sc_alloc_ibuf(sc, &app->buffer_wait)) applet_have_more_data(app); - count = co_data(sc_oc(sc)); + channel_check_idletimer(ic); + + input = ic->total; + output = co_data(oc); app->applet->fct(app); TRACE_POINT(APPLET_EV_PROCESS, app); @@ -458,9 +819,9 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) /* now check if the applet has released some room and forgot to * notify the other side about it. */ - if (count != co_data(sc_oc(sc))) { - sc_oc(sc)->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; - if (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed) + if (output != co_data(oc)) { + oc->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; + if (sco->room_needed < 0 || channel_recv_max(oc) >= sco->room_needed) sc_have_room(sco); did_send = 1; } @@ -469,14 +830,18 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) sc_have_room(sco); } - if (sc_ic(sc)->flags & CF_READ_EVENT) + input = ic->total - input; + if (input) { + channel_check_xfer(ic, input); sc_ep_report_read_activity(sc); + } + /* TODO: May be move in appctx_rcv_buf or sc_applet_process ? */ if (sc_waiting_room(sc) && (sc->flags & SC_FL_ABRT_DONE)) { sc_ep_set(sc, SE_FL_EOS|SE_FL_ERROR); } - if (!co_data(sc_oc(sc))) { + if (!co_data(oc)) { if (did_send) sc_ep_report_send_activity(sc); } @@ -495,7 +860,109 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) } sc->app_ops->wake(sc); - channel_release_buffer(sc_ic(sc), &app->buffer_wait); + channel_release_buffer(ic, &app->buffer_wait); + TRACE_LEAVE(APPLET_EV_PROCESS, app); + return t; +} + + +/* Default applet handler based on IN/OUT buffers. It is a true task here, no a tasklet */ +struct task *task_process_applet(struct task *t, void *context, unsigned int state) +{ + struct appctx *app = context; + struct stconn *sc; + unsigned int rate; + + TRACE_ENTER(APPLET_EV_PROCESS, app); + + if (applet_fl_test(app, APPCTX_FL_WANT_DIE)) { + TRACE_DEVEL("APPCTX want die, release it", APPLET_EV_FREE, app); + __appctx_free(app); + return NULL; + } + + if (se_fl_test(app->sedesc, SE_FL_ORPHAN)) { + /* Finalize init of orphan appctx. .init callback function must + * be defined and it must finalize appctx startup. + */ + BUG_ON(!app->applet->init); + + if (appctx_init(app) == -1) { + TRACE_DEVEL("APPCTX init failed", APPLET_EV_FREE|APPLET_EV_ERR, app); + appctx_free_on_early_error(app); + return NULL; + } + BUG_ON(!app->sess || !appctx_sc(app) || !appctx_strm(app)); + TRACE_DEVEL("APPCTX initialized", APPLET_EV_PROCESS, app); + } + + sc = appctx_sc(app); + + sc_applet_sync_send(sc); + + /* We always pretend the applet can't get and doesn't want to + * put, it's up to it to change this if needed. This ensures + * that one applet which ignores any event will not spin. + */ + applet_need_more_data(app); + applet_have_no_more_data(app); + + app->applet->fct(app); + + TRACE_POINT(APPLET_EV_PROCESS, app); + + if (b_data(&app->outbuf) || se_fl_test(app->sedesc, SE_FL_MAY_FASTFWD_PROD) || + applet_fl_test(app, APPCTX_FL_EOI|APPCTX_FL_EOS|APPCTX_FL_ERROR)) + applet_have_more_data(app); + + sc_applet_sync_recv(sc); + + /* TODO: May be move in appctx_rcv_buf or sc_applet_process ? */ + if (sc_waiting_room(sc) && (sc->flags & SC_FL_ABRT_DONE)) { + sc_ep_set(sc, SE_FL_EOS|SE_FL_ERROR); + } + + /* measure the call rate and check for anomalies when too high */ + if (((b_size(sc_ib(sc)) && sc->flags & SC_FL_NEED_BUFF) || // asks for a buffer which is present + (b_size(sc_ib(sc)) && !b_data(sc_ib(sc)) && sc->flags & SC_FL_NEED_ROOM) || // asks for room in an empty buffer + (b_data(sc_ob(sc)) && sc_is_send_allowed(sc)) || // asks for data already present + (!b_data(sc_ib(sc)) && b_data(sc_ob(sc)) && // didn't return anything ... + (!(sc_oc(sc)->flags & CF_WRITE_EVENT) && (sc->flags & SC_FL_SHUT_WANTED))))) { // ... and left data pending after a shut + rate = update_freq_ctr(&app->call_rate, 1); + if (rate >= 100000 && app->call_rate.prev_ctr) // looped like this more than 100k times over last second + stream_dump_and_crash(&app->obj_type, read_freq_ctr(&app->call_rate)); + } + + sc->app_ops->wake(sc); + appctx_release_buffers(app); TRACE_LEAVE(APPLET_EV_PROCESS, app); return t; } + +/* config parser for global "tune.applet.zero-copy-forwarding" */ +static int cfg_parse_applet_zero_copy_fwd(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "on") == 0) + global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_APPLET; + else if (strcmp(args[1], "off") == 0) + global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_APPLET; + else { + memprintf(err, "'%s' expects 'on' or 'off'.", args[0]); + return -1; + } + return 0; +} + + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.applet.zero-copy-forwarding", cfg_parse_applet_zero_copy_fwd }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/backend.c b/src/backend.c index 39d2c75..d74ae40 100644 --- a/src/backend.c +++ b/src/backend.c @@ -39,6 +39,7 @@ #include <haproxy/lb_fwlc.h> #include <haproxy/lb_fwrr.h> #include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/log.h> #include <haproxy/namespace.h> #include <haproxy/obj_type.h> @@ -61,14 +62,6 @@ #define TRACE_SOURCE &trace_strm -int be_lastsession(const struct proxy *be) -{ - if (be->be_counters.last_sess) - return ns_to_sec(now_ns) - be->be_counters.last_sess; - - return -1; -} - /* helper function to invoke the correct hash method */ unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len) { @@ -176,7 +169,7 @@ void update_backend_weight(struct proxy *px) * If any server is found, it will be returned. If no valid server is found, * NULL is returned. */ -static struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid) +struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid) { unsigned int h, l; @@ -220,7 +213,7 @@ static struct server *get_server_sh(struct proxy *px, const char *addr, int len, * algorithm out of a tens because it gave him the best results. * */ -static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid) +struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid) { unsigned int hash = 0; int c; @@ -268,7 +261,7 @@ static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, co * is returned. If any server is found, it will be returned. If no valid server * is found, NULL is returned. */ -static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid) +struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid) { unsigned int hash = 0; const char *start, *end; @@ -327,7 +320,7 @@ static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_l /* * this does the same as the previous server_ph, but check the body contents */ -static struct server *get_server_ph_post(struct stream *s, const struct server *avoid) +struct server *get_server_ph_post(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct channel *req = &s->req; @@ -412,7 +405,7 @@ static struct server *get_server_ph_post(struct stream *s, const struct server * * is found, NULL is returned. When lbprm.arg_opt1 is set, the hash will only * apply to the middle part of a domain name ("use_domain_only" option). */ -static struct server *get_server_hh(struct stream *s, const struct server *avoid) +struct server *get_server_hh(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -485,7 +478,7 @@ static struct server *get_server_hh(struct stream *s, const struct server *avoid } /* RDP Cookie HASH. */ -static struct server *get_server_rch(struct stream *s, const struct server *avoid) +struct server *get_server_rch(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -530,7 +523,7 @@ static struct server *get_server_rch(struct stream *s, const struct server *avoi /* sample expression HASH. Returns NULL if the sample is not found or if there * are no server, relying on the caller to fall back to round robin instead. */ -static struct server *get_server_expr(struct stream *s, const struct server *avoid) +struct server *get_server_expr(struct stream *s, const struct server *avoid) { struct proxy *px = s->be; struct sample *smp; @@ -560,7 +553,7 @@ static struct server *get_server_expr(struct stream *s, const struct server *avo } /* random value */ -static struct server *get_server_rnd(struct stream *s, const struct server *avoid) +struct server *get_server_rnd(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -653,9 +646,9 @@ int assign_server(struct stream *s) if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI && ((s->sess->flags & SESS_FL_PREFER_LAST) || (s->be->options & PR_O_PREF_LAST))) { - struct sess_srv_list *srv_list; - list_for_each_entry(srv_list, &s->sess->srv_list, srv_list) { - struct server *tmpsrv = objt_server(srv_list->target); + struct sess_priv_conns *pconns; + list_for_each_entry(pconns, &s->sess->priv_conns, sess_el) { + struct server *tmpsrv = objt_server(pconns->target); if (tmpsrv && tmpsrv->proxy == s->be && ((s->sess->flags & SESS_FL_PREFER_LAST) || @@ -663,7 +656,7 @@ int assign_server(struct stream *s) server_has_room(tmpsrv) || ( tmpsrv->queue.length + 1 < s->be->max_ka_queue))) && srv_currently_usable(tmpsrv)) { - list_for_each_entry(conn, &srv_list->conn_list, session_list) { + list_for_each_entry(conn, &pconns->conn_list, sess_el) { if (!(conn->flags & CO_FL_WAIT_XPRT)) { srv = tmpsrv; s->target = &srv->obj_type; @@ -813,6 +806,14 @@ int assign_server(struct stream *s) break; default: + if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_SA) { + /* some special algos that cannot be grouped together */ + + if ((s->be->lbprm.algo & BE_LB_PARM) == BE_LB_SA_SS) + srv = ss_get_server(s->be); + + break; + } /* unknown balancing algorithm */ err = SRV_STATUS_INTERNAL; goto out; @@ -1232,7 +1233,7 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is continue; conn = srv_lookup_conn(is_safe ? &srv->per_thr[i].safe_conns : &srv->per_thr[i].idle_conns, hash); while (conn) { - if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) { + if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { conn_delete_from_tree(conn); _HA_ATOMIC_INC(&activity[tid].fd_takeover); found = 1; @@ -1245,7 +1246,7 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is if (!found && !is_safe && srv->curr_safe_nb > 0) { conn = srv_lookup_conn(&srv->per_thr[i].safe_conns, hash); while (conn) { - if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) { + if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { conn_delete_from_tree(conn); _HA_ATOMIC_INC(&activity[tid].fd_takeover); found = 1; @@ -1348,9 +1349,7 @@ int connect_server(struct stream *s) int reuse = 0; int init_mux = 0; int err; -#ifdef USE_OPENSSL - struct sample *sni_smp = NULL; -#endif + struct sample *name_smp = NULL; struct sockaddr_storage *bind_addr = NULL; int proxy_line_ret; int64_t hash = 0; @@ -1372,13 +1371,11 @@ int connect_server(struct stream *s) if (err != SRV_STATUS_OK) return SF_ERR_INTERNAL; -#ifdef USE_OPENSSL - if (srv && srv->ssl_ctx.sni) { - sni_smp = sample_fetch_as_type(s->be, s->sess, s, - SMP_OPT_DIR_REQ | SMP_OPT_FINAL, - srv->ssl_ctx.sni, SMP_T_STR); + if (srv && srv->pool_conn_name_expr) { + name_smp = sample_fetch_as_type(s->be, s->sess, s, + SMP_OPT_DIR_REQ | SMP_OPT_FINAL, + srv->pool_conn_name_expr, SMP_T_STR); } -#endif /* do not reuse if mode is not http */ if (!IS_HTX_STRM(s)) { @@ -1402,17 +1399,12 @@ int connect_server(struct stream *s) /* 1. target */ hash_params.target = s->target; -#ifdef USE_OPENSSL - /* 2. sni - * only test if the sample is not null as smp_make_safe (called before - * ssl_sock_set_servername) can only fails if this is not the case - */ - if (sni_smp) { - hash_params.sni_prehash = - conn_hash_prehash(sni_smp->data.u.str.area, - sni_smp->data.u.str.data); + /* 2. pool-conn-name */ + if (name_smp) { + hash_params.name_prehash = + conn_hash_prehash(name_smp->data.u.str.area, + name_smp->data.u.str.data); } -#endif /* USE_OPENSSL */ /* 3. destination address */ if (srv && srv_is_transparent(srv)) @@ -1423,13 +1415,43 @@ int connect_server(struct stream *s) /* 5. proxy protocol */ if (srv && srv->pp_opts) { - proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s); + proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s, strm_sess(s)); if (proxy_line_ret) { hash_params.proxy_prehash = conn_hash_prehash(trash.area, proxy_line_ret); } } + /* 6. Custom mark, tos? */ + if (s->flags & (SF_BC_MARK | SF_BC_TOS)) { + /* mark: 32bits, tos: 8bits = 40bits + * last 2 bits are there to indicate if mark and/or tos are set + * total: 42bits: + * + * 63==== (unused) ====42 39----32 31-----------------------------0 + * 0000000000000000000000 11 00000111 00000000000000000000000000000011 + * ^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * || | | + * / \ \ \ + * / \ \ \ + * tos? mark? \ mark value (32bits) + * tos value (8bits) + * ie: in the above example: + * - mark is set, mark = 3 + * - tos is set, tos = 7 + */ + if (s->flags & SF_BC_MARK) { + hash_params.mark_tos_prehash |= s->bc_mark; + /* 41th bit: mark set */ + hash_params.mark_tos_prehash |= 1ULL << 40; + } + if (s->flags & SF_BC_TOS) { + hash_params.mark_tos_prehash |= (uint64_t)s->bc_tos << 32; + /* 42th bit: tos set */ + hash_params.mark_tos_prehash |= 1ULL << 41; + } + } + hash = conn_calculate_hash(&hash_params); /* first, search for a matching connection in the session's idle conns */ @@ -1617,6 +1639,18 @@ skip_reuse: srv_conn->src = bind_addr; bind_addr = NULL; + /* mark? */ + if (s->flags & SF_BC_MARK) { + srv_conn->mark = s->bc_mark; + srv_conn->flags |= CO_FL_OPT_MARK; + } + + /* tos? */ + if (s->flags & SF_BC_TOS) { + srv_conn->tos = s->bc_tos; + srv_conn->flags |= CO_FL_OPT_TOS; + } + srv_conn->hash_node->node.key = hash; } } @@ -1744,7 +1778,13 @@ skip_reuse: return err; #ifdef USE_OPENSSL - if (!(s->flags & SF_SRV_REUSED)) { + /* Set socket SNI unless connection is reused. */ + if (srv && srv->ssl_ctx.sni && !(s->flags & SF_SRV_REUSED)) { + struct sample *sni_smp = NULL; + + sni_smp = sample_fetch_as_type(s->be, s->sess, s, + SMP_OPT_DIR_REQ | SMP_OPT_FINAL, + srv->ssl_ctx.sni, SMP_T_STR); if (smp_make_safe(sni_smp)) ssl_sock_set_servername(srv_conn, sni_smp->data.u.str.area); } @@ -2515,8 +2555,8 @@ void back_handle_st_rdy(struct stream *s) */ void set_backend_down(struct proxy *be) { - be->last_change = ns_to_sec(now_ns); - _HA_ATOMIC_INC(&be->down_trans); + be->be_counters.last_change = ns_to_sec(now_ns); + _HA_ATOMIC_INC(&be->be_counters.down_trans); if (!(global.mode & MODE_STARTING)) { ha_alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id); @@ -2588,10 +2628,10 @@ no_cookie: } int be_downtime(struct proxy *px) { - if (px->lbprm.tot_weight && px->last_change < ns_to_sec(now_ns)) // ignore negative time + if (px->lbprm.tot_weight && px->be_counters.last_change < ns_to_sec(now_ns)) // ignore negative time return px->down_time; - return ns_to_sec(now_ns) - px->last_change + px->down_time; + return ns_to_sec(now_ns) - px->be_counters.last_change + px->down_time; } /* @@ -2836,7 +2876,7 @@ int backend_parse_balance(const char **args, char **err, struct proxy *curproxy) } else if (strcmp(args[0], "sticky") == 0) { curproxy->lbprm.algo &= ~BE_LB_ALGO; - curproxy->lbprm.algo |= BE_LB_ALGO_LS; + curproxy->lbprm.algo |= BE_LB_ALGO_SS; } else { memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options."); @@ -3029,7 +3069,7 @@ smp_fetch_be_sess_rate(const struct arg *args, struct sample *smp, const char *k smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->be_sess_per_sec); + smp->data.u.sint = read_freq_ctr(&px->be_counters.sess_per_sec); return 1; } @@ -3212,7 +3252,7 @@ smp_fetch_srv_sess_rate(const struct arg *args, struct sample *smp, const char * { smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&args->data.srv->sess_per_sec); + smp->data.u.sint = read_freq_ctr(&args->data.srv->counters.sess_per_sec); return 1; } diff --git a/src/cache.c b/src/cache.c index 9f12f10..32f2e47 100644 --- a/src/cache.c +++ b/src/cache.c @@ -77,6 +77,7 @@ struct cache_appctx { unsigned int rem_data; /* Remaining bytes for the last data block (HTX only, 0 means process next block) */ unsigned int send_notmodified:1; /* In case of conditional request, we might want to send a "304 Not Modified" response instead of the stored data. */ unsigned int unused:31; + /* 4 bytes hole here */ struct shared_block *next; /* The next block of data to be sent for this cache entry. */ }; @@ -193,7 +194,7 @@ struct cache_entry { unsigned int latest_validation; /* latest validation date */ unsigned int expire; /* expiration date (wall clock time) */ unsigned int age; /* Origin server "Age" header value */ - + unsigned int body_size; /* Size of the body */ int refcount; struct eb32_node eb; /* ebtree node used to hold the cache object */ @@ -231,8 +232,8 @@ DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st)); static struct eb32_node *insert_entry(struct cache *cache, struct cache_tree *tree, struct cache_entry *new_entry); static void delete_entry(struct cache_entry *del_entry); -static void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry); -static void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry); +static inline void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry); +static inline void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry); /* * Find a cache_entry in the <cache>'s tree that has the hash <hash>. @@ -753,6 +754,7 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms struct htx_blk *blk; struct shared_block *fb; struct htx_ret htxret; + size_t data_len = 0; unsigned int orig_len, to_forward; int ret; @@ -789,6 +791,7 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms chunk_memcat(&trash, (char *)&info, sizeof(info)); chunk_istcat(&trash, v); to_forward += v.len; + data_len += v.len; len -= v.len; break; @@ -817,6 +820,8 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms goto no_cache; } + /* disguise below to shut a warning on */ + DISGUISE((struct cache_entry *)st->first_block->data)->body_size += data_len; ret = shctx_row_data_append(shctx, st->first_block, (unsigned char *)b_head(&trash), b_data(&trash)); if (ret < 0) @@ -1133,7 +1138,7 @@ static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature) * "vary" on the accept-encoding value. * Returns 0 if we found a known encoding in the response, -1 otherwise. */ -static int set_secondary_key_encoding(struct htx *htx, char *secondary_key) +static int set_secondary_key_encoding(struct htx *htx, unsigned int vary_signature, char *secondary_key) { unsigned int resp_encoding_bitmap = 0; const struct vary_hashing_information *info = vary_information; @@ -1143,6 +1148,11 @@ static int set_secondary_key_encoding(struct htx *htx, char *secondary_key) unsigned int encoding_value; struct http_hdr_ctx ctx = { .blk = NULL }; + /* We must not set the accept encoding part of the secondary signature + * if the response does not vary on 'Accept Encoding'. */ + if (!(vary_signature & VARY_ACCEPT_ENCODING)) + return 0; + /* Look for the accept-encoding part of the secondary_key. */ while (count < hash_info_count && info->value != VARY_ACCEPT_ENCODING) { offset += info->hash_length; @@ -1404,7 +1414,7 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px, * We will not cache a response that has an unknown encoding (not * explicitly supported in parse_encoding_value function). */ if (cache->vary_processing_enabled && vary_signature) - if (set_secondary_key_encoding(htx, object->secondary_key)) + if (set_secondary_key_encoding(htx, vary_signature, object->secondary_key)) goto out; if (!shctx_row_reserve_hot(shctx, first, trash.data)) { @@ -1480,8 +1490,7 @@ static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, e unsigned int max, total; uint32_t blksz; - max = htx_get_max_blksz(htx, - channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx)); + max = htx_free_data_space(htx); if (!max) return 0; blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR) @@ -1521,14 +1530,14 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h struct cache_appctx *ctx = appctx->svcctx; struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; struct shared_context *shctx = shctx_ptr(cconf->c.cache); - unsigned int max, total, rem_data; + unsigned int max, total, rem_data, data_len; uint32_t blksz; - max = htx_get_max_blksz(htx, - channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx)); + max = htx_free_data_space(htx); if (!max) return 0; + data_len = 0; rem_data = 0; if (ctx->rem_data) { blksz = ctx->rem_data; @@ -1551,6 +1560,7 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h offset += sz; blksz -= sz; total += sz; + data_len += sz; if (sz < max) break; if (blksz || offset == shctx->block_size) { @@ -1563,6 +1573,7 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h ctx->next = shblk; ctx->sent += total; ctx->rem_data = rem_data + blksz; + appctx->to_forward -= data_len; return total; } @@ -1619,6 +1630,108 @@ static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigne return total; } +static unsigned int ff_cache_dump_data_blk(struct appctx *appctx, struct buffer *buf, unsigned int len, + uint32_t info, struct shared_block *shblk, unsigned int offset) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; + struct shared_context *shctx = shctx_ptr(cconf->c.cache); + unsigned int total, rem_data, data_len; + uint32_t blksz; + + total = 0; + data_len = 0; + rem_data = 0; + if (ctx->rem_data) + blksz = ctx->rem_data; + else { + blksz = (info & 0xfffffff); + ctx->sent += 4; + } + if (blksz > len) { + rem_data = blksz - len; + blksz = len; + } + + while (blksz) { + size_t sz; + + len = MIN(blksz, shctx->block_size - offset); + sz = b_putblk(buf, (char *)(shblk->data + offset), len); + offset += sz; + blksz -= sz; + total += sz; + data_len += sz; + if (sz < len) + break; + if (blksz || offset == shctx->block_size) { + shblk = LIST_NEXT(&shblk->list, typeof(shblk), list); + offset = 0; + } + } + + ctx->offset = offset; + ctx->next = shblk; + ctx->sent += total; + ctx->rem_data = rem_data + blksz; + appctx->to_forward -= data_len; + return total; +} + +static size_t ff_cache_dump_msg(struct appctx *appctx, struct buffer *buf, unsigned int len) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_entry *cache_ptr = ctx->entry; + struct shared_block *first = block_ptr(cache_ptr); + struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; + struct shared_context *shctx = shctx_ptr(cconf->c.cache); + struct shared_block *shblk; + unsigned int offset, sz; + unsigned int ret, total = 0; + + while (len && (ctx->sent != first->len - sizeof(*cache_ptr))) { + enum htx_blk_type type; + uint32_t info; + + shblk = ctx->next; + offset = ctx->offset; + if (ctx->rem_data) { + type = HTX_BLK_DATA; + info = 0; + goto add_data_blk; + } + + /* Get info of the next HTX block. May be split on 2 shblk */ + sz = MIN(4, shctx->block_size - offset); + memcpy((char *)&info, (const char *)shblk->data + offset, sz); + offset += sz; + if (sz < 4) { + shblk = LIST_NEXT(&shblk->list, typeof(shblk), list); + memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz); + offset = (4 - sz); + } + + /* Get payload of the next HTX block and insert it. */ + type = (info >> 28); + if (type == HTX_BLK_DATA) { + add_data_blk: + ret = ff_cache_dump_data_blk(appctx, buf, len, info, shblk, offset); + } + else + ret = 0; + + if (!ret) + break; + total += ret; + len -= ret; + + if (ctx->rem_data) + break; + } + + return total; +} + static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx) { struct cache_appctx *ctx = appctx->svcctx; @@ -1637,31 +1750,58 @@ static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx) return 1; } +static size_t http_cache_fastfwd(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_entry *cache_ptr = ctx->entry; + struct shared_block *first = block_ptr(cache_ptr); + size_t ret; + + BUG_ON(!appctx->to_forward || count > appctx->to_forward); + + ret = ff_cache_dump_msg(appctx, buf, count); + + if (!appctx->to_forward) { + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + if (ctx->sent == first->len - sizeof(*cache_ptr)) { + applet_set_eoi(appctx); + applet_set_eos(appctx); + appctx->st0 = HTX_CACHE_END; + } + } + return ret; +} + static void http_cache_io_handler(struct appctx *appctx) { struct cache_appctx *ctx = appctx->svcctx; struct cache_entry *cache_ptr = ctx->entry; struct shared_block *first = block_ptr(cache_ptr); - struct stconn *sc = appctx_sc(appctx); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct htx *req_htx, *res_htx; + struct htx *res_htx = NULL; struct buffer *errmsg; unsigned int len; - size_t ret, total = 0; + size_t ret; - res_htx = htx_from_buf(&res->buf); - total = res_htx->data; + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto exit; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) - goto out; + if (applet_fl_test(appctx, APPCTX_FL_FASTFWD) && se_fl_test(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD)) + goto exit; - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); - goto out; + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + goto exit; + } + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + goto exit; } + res_htx = htx_from_buf(&appctx->outbuf); + + len = first->len - sizeof(*cache_ptr) - ctx->sent; + res_htx = htx_from_buf(&appctx->outbuf); + if (appctx->st0 == HTX_CACHE_INIT) { ctx->next = block_ptr(cache_ptr); ctx->offset = sizeof(*cache_ptr); @@ -1671,8 +1811,13 @@ static void http_cache_io_handler(struct appctx *appctx) } if (appctx->st0 == HTX_CACHE_HEADER) { + struct ist meth; + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC))) { + goto exit; + } + /* Headers must be dump at once. Otherwise it is an error */ - len = first->len - sizeof(*cache_ptr) - ctx->sent; ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH); if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) || !htx_cache_add_age_hdr(appctx, res_htx)) @@ -1689,60 +1834,66 @@ static void http_cache_io_handler(struct appctx *appctx) /* Skip response body for HEAD requests or in case of "304 Not * Modified" response. */ - if (__sc_strm(sc)->txn->meth == HTTP_METH_HEAD || ctx->send_notmodified) + meth = htx_sl_req_meth(http_get_stline(htxbuf(&appctx->inbuf))); + if (find_http_meth(istptr(meth), istlen(meth)) == HTTP_METH_HEAD || ctx->send_notmodified) appctx->st0 = HTX_CACHE_EOM; - else + else { + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_APPLET)) + se_fl_set(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + + appctx->to_forward = cache_ptr->body_size; + len = first->len - sizeof(*cache_ptr) - ctx->sent; appctx->st0 = HTX_CACHE_DATA; + } } if (appctx->st0 == HTX_CACHE_DATA) { - len = first->len - sizeof(*cache_ptr) - ctx->sent; if (len) { ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_UNUSED); if (ret < len) { - sc_need_room(sc, channel_htx_recv_max(res, res_htx) + 1); + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); goto out; } } + BUG_ON(appctx->to_forward); appctx->st0 = HTX_CACHE_EOM; } if (appctx->st0 == HTX_CACHE_EOM) { /* no more data are expected. */ res_htx->flags |= HTX_FL_EOM; - se_fl_set(appctx->sedesc, SE_FL_EOI); - + applet_set_eoi(appctx); + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); appctx->st0 = HTX_CACHE_END; } end: - if (appctx->st0 == HTX_CACHE_END) - se_fl_set(appctx->sedesc, SE_FL_EOS); + if (appctx->st0 == HTX_CACHE_END) { + applet_set_eos(appctx); + } out: - total = res_htx->data - total; - if (total) - channel_add_input(res, total); - htx_to_buf(res_htx, &res->buf); + if (res_htx) + htx_to_buf(res_htx, &appctx->outbuf); + exit: /* eat the whole request */ - if (co_data(req)) { - req_htx = htx_from_buf(&req->buf); - co_htx_skip(req, req_htx, co_data(req)); - htx_to_buf(req_htx, &req->buf); - } + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + appctx->sedesc->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; return; error: /* Sent and HTTP error 500 */ - b_reset(&res->buf); + b_reset(&appctx->outbuf); errmsg = &http_err_chunks[HTTP_ERR_500]; - res->buf.data = b_data(errmsg); - memcpy(res->buf.area, b_head(errmsg), b_data(errmsg)); - res_htx = htx_from_buf(&res->buf); + appctx->outbuf.data = b_data(errmsg); + memcpy(appctx->outbuf.area, b_head(errmsg), b_data(errmsg)); + res_htx = htx_from_buf(&appctx->outbuf); - total = 0; - se_fl_set(appctx->sedesc, SE_FL_ERROR); + applet_set_eos(appctx); + applet_set_error(appctx); appctx->st0 = HTX_CACHE_END; goto end; } @@ -2324,7 +2475,7 @@ int post_check_cache() list_for_each_entry_safe(cache_config, back, &caches_config, list) { ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE, - cache_config->maxobjsz, sizeof(struct cache)); + cache_config->maxobjsz, sizeof(struct cache), cache_config->id); if (ret_shctx <= 0) { if (ret_shctx == SHCTX_E_INIT_LOCK) @@ -2995,9 +3146,13 @@ struct applet http_cache_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<CACHE>", /* used for logging */ .fct = http_cache_io_handler, + .rcv_buf = appctx_htx_rcv_buf, + .snd_buf = appctx_htx_snd_buf, + .fastfwd = http_cache_fastfwd, .release = http_cache_applet_release, }; + /* config parsers for this section */ REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache); REGISTER_POST_CHECK(post_check_cache); diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index f31e7a0..452c0e5 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -36,8 +36,7 @@ static const char *common_kw_list[] = { "insecure-fork-wanted", "insecure-setuid-wanted", "nosplice", "nogetaddrinfo", "noreuseport", "quiet", "zero-warning", "tune.runqueue-depth", "tune.maxpollevents", "tune.maxaccept", - "tune.recv_enough", "tune.buffers.limit", - "tune.buffers.reserve", "tune.bufsize", "tune.maxrewrite", + "tune.recv_enough", "tune.bufsize", "tune.maxrewrite", "tune.idletimer", "tune.rcvbuf.client", "tune.rcvbuf.server", "tune.sndbuf.client", "tune.sndbuf.server", "tune.pipesize", "tune.http.cookielen", "tune.http.logurilen", "tune.http.maxhdr", @@ -52,6 +51,7 @@ static const char *common_kw_list[] = { "presetenv", "unsetenv", "resetenv", "strict-limits", "localpeer", "numa-cpu-mapping", "defaults", "listen", "frontend", "backend", "peers", "resolvers", "cluster-secret", "no-quic", "limited-quic", + "stats-file", NULL /* must be last */ }; @@ -75,6 +75,9 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) alertif_too_many_args(0, file, linenum, args, &err_code); goto out; } + else if (strcmp(args[0], "expose-deprecated-directives") == 0) { + deprecated_directives_allowed = 1; + } else if (strcmp(args[0], "expose-experimental-directives") == 0) { experimental_directives_allowed = 1; } @@ -263,36 +266,6 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) } global.tune.recv_enough = atol(args[1]); } - else if (strcmp(args[0], "tune.buffers.limit") == 0) { - if (alertif_too_many_args(1, file, linenum, args, &err_code)) - goto out; - if (*(args[1]) == 0) { - ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - global.tune.buf_limit = atol(args[1]); - if (global.tune.buf_limit) { - if (global.tune.buf_limit < 3) - global.tune.buf_limit = 3; - if (global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; - } - } - else if (strcmp(args[0], "tune.buffers.reserve") == 0) { - if (alertif_too_many_args(1, file, linenum, args, &err_code)) - goto out; - if (*(args[1]) == 0) { - ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - global.tune.reserved_bufs = atol(args[1]); - if (global.tune.reserved_bufs < 2) - global.tune.reserved_bufs = 2; - if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; - } else if (strcmp(args[0], "tune.bufsize") == 0) { if (alertif_too_many_args(1, file, linenum, args, &err_code)) goto out; @@ -1028,6 +1001,21 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) global.server_state_file = strdup(args[1]); } + else if (strcmp(args[0], "stats-file") == 0) { /* path to the file where HAProxy can load the server states */ + if (global.stats_file != NULL) { + ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]); + err_code |= ERR_ALERT; + goto out; + } + + if (!*(args[1])) { + ha_alert("parsing [%s:%d] : '%s' expect one argument: a file path.\n", file, linenum, args[0]); + err_code |= ERR_FATAL; + goto out; + } + + global.stats_file = strdup(args[1]); + } else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */ if (alertif_too_many_args(1, file, linenum, args, &err_code)) goto out; @@ -1388,8 +1376,59 @@ static int cfg_parse_prealloc_fd(char **args, int section_type, struct proxy *cu return 0; } +/* Parser for harden.reject-privileged-ports.{tcp|quic}. */ +static int cfg_parse_reject_privileged_ports(char **args, int section_type, + struct proxy *curpx, + const struct proxy *defpx, + const char *file, int line, char **err) +{ + struct ist proto; + char onoff; + + if (!*(args[1])) { + memprintf(err, "'%s' expects either 'on' or 'off'.", args[0]); + return -1; + } + + proto = ist(args[0]); + while (istlen(istfind(proto, '.'))) + proto = istadv(istfind(proto, '.'), 1); + + if (strcmp(args[1], "on") == 0) { + onoff = 1; + } + else if (strcmp(args[1], "off") == 0) { + onoff = 0; + } + else { + memprintf(err, "'%s' expects either 'on' or 'off'.", args[0]); + return -1; + } + + if (istmatch(proto, ist("tcp"))) { + if (!onoff) + global.clt_privileged_ports |= HA_PROTO_TCP; + else + global.clt_privileged_ports &= ~HA_PROTO_TCP; + } + else if (istmatch(proto, ist("quic"))) { + if (!onoff) + global.clt_privileged_ports |= HA_PROTO_QUIC; + else + global.clt_privileged_ports &= ~HA_PROTO_QUIC; + } + else { + memprintf(err, "invalid protocol for '%s'.", args[0]); + return -1; + } + + return 0; +} + static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "prealloc-fd", cfg_parse_prealloc_fd }, + { CFG_GLOBAL, "harden.reject-privileged-ports.tcp", cfg_parse_reject_privileged_ports }, + { CFG_GLOBAL, "harden.reject-privileged-ports.quic", cfg_parse_reject_privileged_ports }, { 0, NULL, NULL }, }}; diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c index a97b1e5..9ee8174 100644 --- a/src/cfgparse-listen.c +++ b/src/cfgparse-listen.c @@ -1819,13 +1819,13 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm) if (!stats_check_init_uri_auth(&curproxy->uri_auth)) goto alloc_error; } else if (strcmp(args[1], "hide-version") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_HIDEVER)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_HIDEVER)) goto alloc_error; } else if (strcmp(args[1], "show-legends") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_SHLGNDS)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_SHLGNDS)) goto alloc_error; } else if (strcmp(args[1], "show-modules") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_SHMODULES)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_SHMODULES)) goto alloc_error; } else if (strcmp(args[1], "show-node") == 0) { @@ -2096,33 +2096,27 @@ stats_error_parsing: if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code)) goto out; } - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; char *clflogformat = ""; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; if (logformat == clf_http_log_format) clflogformat = " clf"; ha_warning("parsing [%s:%d]: 'option httplog%s' overrides previous '%s' in 'defaults' section.\n", file, linenum, clflogformat, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = logformat; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = logformat; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) { ha_warning("parsing [%s:%d] : backend '%s' : 'option httplog' directive is ignored in backends.\n", @@ -2131,31 +2125,25 @@ stats_error_parsing: } } else if (strcmp(args[1], "tcplog") == 0) { - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'option tcplog' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } /* generate a detailed TCP log */ - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = default_tcp_log_format; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = default_tcp_log_format; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code)) goto out; @@ -2170,30 +2158,24 @@ stats_error_parsing: char *logformat; /* generate a complete HTTP log */ logformat = default_https_log_format; - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'option httplog' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = logformat; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = logformat; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) { ha_warning("parsing [%s:%d] : backend '%s' : 'option httpslog' directive is ignored in backends.\n", @@ -2591,14 +2573,12 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - free(curproxy->conf.uniqueid_format_string); - curproxy->conf.uniqueid_format_string = strdup(args[1]); - if (!curproxy->conf.uniqueid_format_string) + lf_expr_deinit(&curproxy->format_unique_id); + curproxy->format_unique_id.str = strdup(args[1]); + if (!curproxy->format_unique_id.str) goto alloc_error; - - free(curproxy->conf.uif_file); - curproxy->conf.uif_file = strdup(curproxy->conf.args.file); - curproxy->conf.uif_line = curproxy->conf.args.line; + curproxy->format_unique_id.conf.file = strdup(curproxy->conf.args.file); + curproxy->format_unique_id.conf.line = curproxy->conf.args.line; } else if (strcmp(args[0], "unique-id-header") == 0) { @@ -2630,32 +2610,26 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'log-format' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = strdup(args[1]); - if (!curproxy->conf.logformat_string) + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = strdup(args[1]); + if (!curproxy->logformat.str) goto alloc_error; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; /* get a chance to improve log-format error reporting by * reporting the correct line-number when possible. @@ -2678,15 +2652,12 @@ stats_error_parsing: goto out; } - if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(curproxy->conf.logformat_sd_string); - curproxy->conf.logformat_sd_string = strdup(args[1]); - if (!curproxy->conf.logformat_sd_string) + lf_expr_deinit(&curproxy->logformat_sd); + curproxy->logformat_sd.str = strdup(args[1]); + if (!curproxy->logformat_sd.str) goto alloc_error; - - free(curproxy->conf.lfsd_file); - curproxy->conf.lfsd_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfsd_line = curproxy->conf.args.line; + curproxy->logformat_sd.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat_sd.conf.line = curproxy->conf.args.line; /* get a chance to improve log-format-sd error reporting by * reporting the correct line-number when possible. @@ -2708,18 +2679,17 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - if (curproxy->conf.error_logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat_error.str && curproxy->cap & PR_CAP_DEF) { ha_warning("parsing [%s:%d]: 'error-log-format' overrides previous 'error-log-format' in 'defaults' section.\n", file, linenum); } - free(curproxy->conf.error_logformat_string); - curproxy->conf.error_logformat_string = strdup(args[1]); - if (!curproxy->conf.error_logformat_string) + lf_expr_deinit(&curproxy->logformat_error); + curproxy->logformat_error.str = strdup(args[1]); + if (!curproxy->logformat_error.str) goto alloc_error; - free(curproxy->conf.elfs_file); - curproxy->conf.elfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.elfs_line = curproxy->conf.args.line; + curproxy->logformat_error.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat_error.conf.line = curproxy->conf.args.line;; /* get a chance to improve log-format error reporting by * reporting the correct line-number when possible. diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c index 3b38efa..4a23bf2 100644 --- a/src/cfgparse-quic.c +++ b/src/cfgparse-quic.c @@ -235,6 +235,8 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, suffix = args[0] + prefix_len; if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) global.tune.quic_streams_buf = arg; + else if (strcmp(suffix, "frontend.glitches-threshold") == 0) + global.tune.quic_frontend_glitches_threshold = arg; else if (strcmp(suffix, "frontend.max-streams-bidi") == 0) global.tune.quic_frontend_max_streams_bidi = arg; else if (strcmp(suffix, "max-frame-loss") == 0) @@ -257,35 +259,56 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, return 0; } -/* config parser for global "tune.quic.zero-copy-fwd-send" */ -static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) +/* config parser for global "tune.quic.* {on|off}" */ +static int cfg_parse_quic_tune_on_off(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { + int on; + int prefix_len = strlen("tune.quic."); + const char *suffix; + if (too_many_args(1, args, err, NULL)) return -1; if (strcmp(args[1], "on") == 0) - global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + on = 1; else if (strcmp(args[1], "off") == 0) - global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + on = 0; else { memprintf(err, "'%s' expects 'on' or 'off'.", args[0]); return -1; } + + suffix = args[0] + prefix_len; + if (strcmp(suffix, "zero-copy-fwd-send") == 0 ) { + if (on) + global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + else + global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + } + else if (strcmp(suffix, "cc-hystart") == 0) { + if (on) + global.tune.options |= GTUNE_QUIC_CC_HYSTART; + else + global.tune.options &= ~GTUNE_QUIC_CC_HYSTART; + } + return 0; } static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner }, { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time }, + { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off }, { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting }, + { CFG_GLOBAL, "tune.quic.frontend.glitches-threshold", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time }, { CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting }, - { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd }, + { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_tune_on_off }, { 0, NULL, NULL } }}; diff --git a/src/cfgparse-ssl.c b/src/cfgparse-ssl.c index 5666336..e7a7d47 100644 --- a/src/cfgparse-ssl.c +++ b/src/cfgparse-ssl.c @@ -777,22 +777,23 @@ static int bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, s static int bind_parse_crt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { char path[MAXPATHLEN]; + int default_crt = *args[cur_arg] == 'd' ? 1 : 0; if (!*args[cur_arg + 1]) { memprintf(err, "'%s' : missing certificate location", args[cur_arg]); return ERR_ALERT | ERR_FATAL; } - if ((*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) { + if ((*args[cur_arg + 1] != '@') && (*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(args[cur_arg + 1]) + 1) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, args[cur_arg + 1]) > sizeof(path)) { memprintf(err, "'%s' : path too long", args[cur_arg]); return ERR_ALERT | ERR_FATAL; } - return ssl_sock_load_cert(path, conf, err); + return ssl_sock_load_cert(path, conf, default_crt, err); } - return ssl_sock_load_cert(args[cur_arg + 1], conf, err); + return ssl_sock_load_cert(args[cur_arg + 1], conf, default_crt, err); } /* parse the "crt-list" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */ @@ -1472,35 +1473,6 @@ static int bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, st return ssl_bind_parse_no_ca_names(args, cur_arg, px, &conf->ssl_conf, 0, err); } - -static int ssl_bind_parse_ocsp_update(char **args, int cur_arg, struct proxy *px, - struct ssl_bind_conf *ssl_conf, int from_cli, char **err) -{ - if (!*args[cur_arg + 1]) { - memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]); - return ERR_ALERT | ERR_FATAL; - } - - if (strcmp(args[cur_arg + 1], "on") == 0) - ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_ON; - else if (strcmp(args[cur_arg + 1], "off") == 0) - ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_OFF; - else { - memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]); - return ERR_ALERT | ERR_FATAL; - } - - if (ssl_conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_ON) { - /* We might need to create the main ocsp update task */ - int ret = ssl_create_ocsp_update_task(err); - if (ret) - return ret; - } - - return 0; -} - - /***************************** "server" keywords Parsing ********************************************/ /* parse the "npn" bind keyword */ @@ -1827,7 +1799,7 @@ static int srv_parse_crt(char **args, int *cur_arg, struct proxy *px, struct ser return ERR_ALERT | ERR_FATAL; } - if ((*args[*cur_arg + 1] != '/') && global_ssl.crt_base) + if ((*args[*cur_arg + 1] != '@') && (*args[*cur_arg + 1] != '/') && global_ssl.crt_base) memprintf(&newsrv->ssl_ctx.client_crt, "%s/%s", global_ssl.crt_base, args[*cur_arg + 1]); else memprintf(&newsrv->ssl_ctx.client_crt, "%s", args[*cur_arg + 1]); @@ -2092,16 +2064,23 @@ static int ssl_parse_default_server_options(char **args, int section_type, struc return 0; } -/* parse the "ca-base" / "crt-base" keywords in global section. +/* parse the "ca-base" / "crt-base" / "key-base" keywords in global section. * Returns <0 on alert, >0 on warning, 0 on success. */ -static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct proxy *curpx, +static int ssl_parse_global_path_base(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, const char *file, int line, char **err) { char **target; - target = (args[0][1] == 'a') ? &global_ssl.ca_base : &global_ssl.crt_base; + if (args[0][1] == 'a') + target = &global_ssl.ca_base; + else if (args[0][1] == 'r') + target = &global_ssl.crt_base; + else if (args[0][1] == 'e') + target = &global_ssl.key_base; + else + return -1; if (too_many_args(1, args, err, NULL)) return -1; @@ -2119,77 +2098,56 @@ static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct pr return 0; } -/* parse the "ssl-skip-self-issued-ca" keyword in global section. */ -static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, +/* parse the "ssl-security-level" keyword in global section. */ +static int ssl_parse_security_level(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int linenum, char **err) { -#ifdef SSL_CTX_build_cert_chain - global_ssl.skip_self_issued_ca = 1; - return 0; -#else - memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]); +#ifndef HAVE_SSL_SET_SECURITY_LEVEL + memprintf(err, "global statement '%s' requires at least OpenSSL 1.1.1.", args[0]); return -1; -#endif -} - - -static int ssl_parse_global_ocsp_maxdelay(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) -{ - int value = 0; +#else + char *endptr; - if (*(args[1]) == 0) { - memprintf(err, "'%s' expects an integer argument.", args[0]); + if (!*args[1]) { + ha_alert("parsing [%s:%d] : '%s' : missing value\n", file, linenum, args[0]); return -1; } - value = atoi(args[1]); - if (value < 0) { - memprintf(err, "'%s' expects a positive numeric value.", args[0]); + global_ssl.security_level = strtol(args[1], &endptr, 10); + if (*endptr != '\0') { + ha_alert("parsing [%s:%d] : '%s' : expects an integer argument, found '%s'\n", + file, linenum, args[0], args[1]); return -1; } - if (global_ssl.ocsp_update.delay_min > value) { - memprintf(err, "'%s' can not be lower than tune.ssl.ocsp-update.mindelay.", args[0]); + if (global_ssl.security_level < 0 || global_ssl.security_level > 5) { + ha_alert("parsing [%s:%d] : '%s' : expects a value between 0 and 5\n", + file, linenum, args[0]); return -1; } - - global_ssl.ocsp_update.delay_max = value; +#endif return 0; } -static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) +/* parse the "ssl-skip-self-issued-ca" keyword in global section. */ +static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - int value = 0; - - if (*(args[1]) == 0) { - memprintf(err, "'%s' expects an integer argument.", args[0]); - return -1; - } - - value = atoi(args[1]); - if (value < 0) { - memprintf(err, "'%s' expects a positive numeric value.", args[0]); - return -1; - } - - if (value > global_ssl.ocsp_update.delay_max) { - memprintf(err, "'%s' can not be higher than tune.ssl.ocsp-update.maxdelay.", args[0]); - return -1; - } - - global_ssl.ocsp_update.delay_min = value; - +#ifdef SSL_CTX_build_cert_chain + global_ssl.skip_self_issued_ca = 1; return 0; +#else + memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]); + return -1; +#endif } + /* Note: must not be declared <const> as its list will be overwritten. * Please take care of keeping this list alphabetically sorted, doing so helps * all code contributors. @@ -2199,7 +2157,12 @@ static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct */ /* the <ssl_crtlist_kws> keywords are used for crt-list parsing, they *MUST* be safe - * with their proxy argument NULL and must only fill the ssl_bind_conf */ + * with their proxy argument NULL and must only fill the ssl_bind_conf + * + * /!\ Please update configuration.txt at the crt-list option of the Bind options + * section when adding a keyword in ssl_crtlist_kws. /!\ + * + */ struct ssl_crtlist_kw ssl_crtlist_kws[] = { { "allow-0rtt", ssl_bind_parse_allow_0rtt, 0 }, /* allow 0-RTT */ { "alpn", ssl_bind_parse_alpn, 1 }, /* set ALPN supported protocols */ @@ -2218,7 +2181,6 @@ struct ssl_crtlist_kw ssl_crtlist_kws[] = { { "ssl-min-ver", ssl_bind_parse_tls_method_minmax,1 }, /* minimum version */ { "ssl-max-ver", ssl_bind_parse_tls_method_minmax,1 }, /* maximum version */ { "verify", ssl_bind_parse_verify, 1 }, /* set SSL verify method */ - { "ocsp-update", ssl_bind_parse_ocsp_update, 1 }, /* ocsp update mode (on or off) */ { NULL, NULL, 0 }, }; @@ -2240,6 +2202,7 @@ static struct bind_kw_list bind_kws = { "SSL", { }, { { "crt-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth == 0 */ { "crt-list", bind_parse_crt_list, 1 }, /* load a list of crt from this location */ { "curves", bind_parse_curves, 1 }, /* set SSL curve suite */ + { "default-crt", bind_parse_crt, 1 }, /* load SSL certificates from this location */ { "ecdhe", bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */ { "force-sslv3", bind_parse_tls_method_options, 0 }, /* force SSLv3 */ { "force-tlsv10", bind_parse_tls_method_options, 0 }, /* force TLSv10 */ @@ -2323,8 +2286,9 @@ static struct srv_kw_list srv_kws = { "SSL", { }, { INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws); static struct cfg_kw_list cfg_kws = {ILH, { - { CFG_GLOBAL, "ca-base", ssl_parse_global_ca_crt_base }, - { CFG_GLOBAL, "crt-base", ssl_parse_global_ca_crt_base }, + { CFG_GLOBAL, "ca-base", ssl_parse_global_path_base }, + { CFG_GLOBAL, "crt-base", ssl_parse_global_path_base }, + { CFG_GLOBAL, "key-base", ssl_parse_global_path_base }, { CFG_GLOBAL, "issuers-chain-path", ssl_load_global_issuers_from_path }, { CFG_GLOBAL, "maxsslconn", ssl_parse_global_int }, { CFG_GLOBAL, "ssl-default-bind-options", ssl_parse_default_bind_options }, @@ -2341,6 +2305,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "ssl-provider", ssl_parse_global_ssl_provider }, { CFG_GLOBAL, "ssl-provider-path", ssl_parse_global_ssl_provider_path }, #endif + { CFG_GLOBAL, "ssl-security-level", ssl_parse_security_level }, { CFG_GLOBAL, "ssl-skip-self-issued-ca", ssl_parse_skip_self_issued_ca }, { CFG_GLOBAL, "tune.ssl.cachesize", ssl_parse_global_int }, #ifndef OPENSSL_NO_DH @@ -2372,10 +2337,6 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "ssl-default-server-ciphersuites", ssl_parse_global_ciphersuites }, { CFG_GLOBAL, "ssl-load-extra-files", ssl_parse_global_extra_files }, { CFG_GLOBAL, "ssl-load-extra-del-ext", ssl_parse_global_extra_noext }, -#ifndef OPENSSL_NO_OCSP - { CFG_GLOBAL, "tune.ssl.ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, - { CFG_GLOBAL, "tune.ssl.ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, -#endif { 0, NULL, NULL }, }}; diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c index a4f6f29..2f68daf 100644 --- a/src/cfgparse-tcp.c +++ b/src/cfgparse-tcp.c @@ -169,6 +169,8 @@ static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, stru ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]); return ERR_ALERT | ERR_FATAL; } + global.last_checks |= LSTCHK_SYSADM; + return 0; } #endif diff --git a/src/cfgparse.c b/src/cfgparse.c index bee3040..f5cde50 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -64,6 +64,7 @@ #include <haproxy/lb_fwlc.h> #include <haproxy/lb_fwrr.h> #include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/sink.h> @@ -633,8 +634,6 @@ static struct peer *cfg_peers_add_peer(struct peers *peers, p->conf.file = strdup(file); p->conf.line = linenum; p->last_change = ns_to_sec(now_ns); - p->xprt = xprt_get(XPRT_RAW); - p->sock_init_arg = NULL; HA_SPIN_INIT(&p->lock); if (id) p->id = strdup(id); @@ -659,6 +658,7 @@ static struct peer *cfg_peers_add_peer(struct peers *peers, int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) { static struct peers *curpeers = NULL; + static struct sockaddr_storage *bind_addr = NULL; static int nb_shards = 0; struct peer *newpeer = NULL; const char *err; @@ -729,12 +729,20 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) * Newly allocated listener is at the end of the list */ l = LIST_ELEM(bind_conf->listeners.p, typeof(l), by_bind); + bind_addr = &l->rx.addr; global.maxsock++; /* for the listening socket */ bind_line = 1; if (cfg_peers->local) { + /* Local peer already defined using "server" line has no + * address yet, we should update its server's addr:port + * settings + */ newpeer = cfg_peers->local; + BUG_ON(!newpeer->srv); + newpeer->srv->addr = *bind_addr; + newpeer->srv->svc_port = get_host_port(bind_addr); } else { /* This peer is local. @@ -747,8 +755,6 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) goto out; } } - newpeer->addr = l->rx.addr; - newpeer->proto = l->rx.proto; cur_arg++; } @@ -779,6 +785,7 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) else if (strcmp(args[0], "peers") == 0) { /* new peers section */ /* Initialize these static variables when entering a new "peers" section*/ bind_line = peer_line = 0; + bind_addr = NULL; if (!*args[1]) { ha_alert("parsing [%s:%d] : missing name for peers section.\n", file, linenum); err_code |= ERR_ALERT | ERR_ABORT; @@ -889,6 +896,15 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) goto out; } + if (!parse_addr && bind_addr) { + /* local peer declared using "server": has name but no + * address: we use the known "bind" line addr settings + * as implicit server's addr and port. + */ + curpeers->peers_fe->srv->addr = *bind_addr; + curpeers->peers_fe->srv->svc_port = get_host_port(bind_addr); + } + if (nb_shards && curpeers->peers_fe->srv->shard > nb_shards) { ha_warning("parsing [%s:%d] : '%s %s' : %d peer shard greater value than %d shards value is ignored.\n", file, linenum, args[0], args[1], curpeers->peers_fe->srv->shard, nb_shards); @@ -902,16 +918,6 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) err_code |= ERR_WARN; } - /* If the peer address has just been parsed, let's copy it to <newpeer> - * and initializes ->proto. - */ - if (peer || !local_peer) { - newpeer->addr = curpeers->peers_fe->srv->addr; - newpeer->proto = protocol_lookup(newpeer->addr.ss_family, PROTO_TYPE_STREAM, 0); - } - - newpeer->xprt = xprt_get(XPRT_RAW); - newpeer->sock_init_arg = NULL; HA_SPIN_INIT(&newpeer->lock); newpeer->srv = curpeers->peers_fe->srv; @@ -2699,7 +2705,6 @@ static int numa_detect_topology() int check_config_validity() { int cfgerr = 0; - struct proxy *curproxy = NULL; struct proxy *init_proxies_list = NULL; struct stktable *t; struct server *newsrv = NULL; @@ -2725,6 +2730,13 @@ int check_config_validity() if (!global.tune.requri_len) global.tune.requri_len = REQURI_LEN; + if (!global.thread_limit) + global.thread_limit = MAX_THREADS; + +#if defined(USE_THREAD) + if (thread_cpus_enabled_at_boot > global.thread_limit) + thread_cpus_enabled_at_boot = global.thread_limit; +#endif if (!global.nbthread) { /* nbthread not set, thus automatic. In this case, and only if * running on a single process, we enable the same number of @@ -2748,13 +2760,24 @@ int check_config_validity() global.nbtgroups = 1; if (global.nbthread > MAX_THREADS_PER_GROUP * global.nbtgroups) { - ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group). Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n", - global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP); + if (global.nbthread <= global.thread_limit) + ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group). " + "Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n", + global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP); global.nbthread = MAX_THREADS_PER_GROUP * global.nbtgroups; } + + if (global.nbthread > global.thread_limit) + global.nbthread = global.thread_limit; } #endif } + else if (global.nbthread > global.thread_limit) { + ha_warning("nbthread forced to a higher value (%d) than the configured thread-hard-limit (%d), enforcing the limit. " + "Please fix either value to remove this warning.\n", + global.nbthread, global.thread_limit); + global.nbthread = global.thread_limit; + } if (!global.nbtgroups) global.nbtgroups = 1; @@ -2879,8 +2902,7 @@ init_proxies_list_stage1: #ifdef USE_OPENSSL /* no-alpn ? If so, it's the right moment to remove it */ if (bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.alpn_len) { - free(bind_conf->ssl_conf.alpn_str); - bind_conf->ssl_conf.alpn_str = NULL; + ha_free(&bind_conf->ssl_conf.alpn_str); } #ifdef TLSEXT_TYPE_application_layer_protocol_negotiation else if (!bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.npn_str && @@ -2934,6 +2956,12 @@ init_proxies_list_stage1: if (err_code & ERR_FATAL) goto out; } + + if (bind_generate_guid(bind_conf)) { + cfgerr++; + err_code |= ERR_FATAL | ERR_ALERT; + goto out; + } } switch (curproxy->mode) { @@ -3120,6 +3148,12 @@ init_proxies_list_stage1: curproxy->id); err_code |= ERR_WARN; } + if (target->mode == PR_MODE_HTTP) { + /* at least one of the used backends will provoke an + * HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; + } } } @@ -3135,7 +3169,7 @@ init_proxies_list_stage1: * parsing is cancelled and be.name is restored to be resolved. */ pxname = rule->be.name; - LIST_INIT(&rule->be.expr); + lf_expr_init(&rule->be.expr); curproxy->conf.args.ctx = ARGC_UBK; curproxy->conf.args.file = rule->file; curproxy->conf.args.line = rule->line; @@ -3147,20 +3181,23 @@ init_proxies_list_stage1: cfgerr++; continue; } - node = LIST_NEXT(&rule->be.expr, struct logformat_node *, list); + node = LIST_NEXT(&rule->be.expr.nodes.list, struct logformat_node *, list); - if (!LIST_ISEMPTY(&rule->be.expr)) { - if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr) { + if (!lf_expr_isempty(&rule->be.expr)) { + if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr.nodes.list) { rule->dynamic = 1; free(pxname); + /* backend is not yet known so we cannot assume its type, + * thus we should consider that at least one of the used + * backends may provoke HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; continue; } /* Only one element in the list, a simple string: free the expression and * fall back to static rule */ - LIST_DELETE(&node->list); - free(node->arg); - free(node); + lf_expr_deinit(&rule->be.expr); } rule->dynamic = 0; @@ -3187,6 +3224,12 @@ init_proxies_list_stage1: } else { ha_free(&rule->be.name); rule->be.backend = target; + if (target->mode == PR_MODE_HTTP) { + /* at least one of the used backends will provoke an + * HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; + } } err_code |= warnif_tcp_http_cond(curproxy, rule->cond); } @@ -3202,7 +3245,7 @@ init_proxies_list_stage1: * to a static rule, thus the parsing is cancelled and we fall back to setting srv.ptr. */ server_name = srule->srv.name; - LIST_INIT(&srule->expr); + lf_expr_init(&srule->expr); curproxy->conf.args.ctx = ARGC_USRV; err = NULL; if (!parse_logformat_string(server_name, curproxy, &srule->expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) { @@ -3212,10 +3255,10 @@ init_proxies_list_stage1: cfgerr++; continue; } - node = LIST_NEXT(&srule->expr, struct logformat_node *, list); + node = LIST_NEXT(&srule->expr.nodes.list, struct logformat_node *, list); - if (!LIST_ISEMPTY(&srule->expr)) { - if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr) { + if (!lf_expr_isempty(&srule->expr)) { + if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr.nodes.list) { srule->dynamic = 1; free(server_name); continue; @@ -3223,9 +3266,7 @@ init_proxies_list_stage1: /* Only one element in the list, a simple string: free the expression and * fall back to static rule */ - LIST_DELETE(&node->list); - free(node->arg); - free(node); + lf_expr_deinit(&srule->expr); } srule->dynamic = 0; @@ -3335,7 +3376,7 @@ init_proxies_list_stage1: } } - if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_CONVDONE) && + if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_F_CONVDONE) && !LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) && (curproxy->uri_auth->userlist || curproxy->uri_auth->auth_realm )) { ha_alert("%s '%s': stats 'auth'/'realm' and 'http-request' can't be used at the same time.\n", @@ -3345,7 +3386,7 @@ init_proxies_list_stage1: } if (curproxy->uri_auth && curproxy->uri_auth->userlist && - (!(curproxy->uri_auth->flags & STAT_CONVDONE) || + (!(curproxy->uri_auth->flags & STAT_F_CONVDONE) || LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules))) { const char *uri_auth_compat_req[10]; struct act_rule *rule; @@ -3376,16 +3417,16 @@ init_proxies_list_stage1: if (curproxy->uri_auth->auth_realm) { ha_free(&curproxy->uri_auth->auth_realm); } - curproxy->uri_auth->flags |= STAT_CONVDONE; + curproxy->uri_auth->flags |= STAT_F_CONVDONE; } out_uri_auth_compat: /* check whether we have a logger that uses RFC5424 log format */ list_for_each_entry(tmplogger, &curproxy->loggers, list) { if (tmplogger->format == LOG_FORMAT_RFC5424) { - if (!curproxy->conf.logformat_sd_string) { + if (!curproxy->logformat_sd.str) { /* set the default logformat_sd_string */ - curproxy->conf.logformat_sd_string = default_rfc5424_sd_log_format; + curproxy->logformat_sd.str = default_rfc5424_sd_log_format; } break; } @@ -3393,31 +3434,21 @@ out_uri_auth_compat: /* compile the log format */ if (!(curproxy->cap & PR_CAP_FE)) { - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = NULL; - ha_free(&curproxy->conf.lfs_file); - curproxy->conf.lfs_line = 0; - - if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(curproxy->conf.logformat_sd_string); - curproxy->conf.logformat_sd_string = NULL; - ha_free(&curproxy->conf.lfsd_file); - curproxy->conf.lfsd_line = 0; - } - - if (curproxy->conf.logformat_string) { + lf_expr_deinit(&curproxy->logformat); + lf_expr_deinit(&curproxy->logformat_sd); + } + + if (curproxy->logformat.str) { curproxy->conf.args.ctx = ARGC_LOG; - curproxy->conf.args.file = curproxy->conf.lfs_file; - curproxy->conf.args.line = curproxy->conf.lfs_line; + curproxy->conf.args.file = curproxy->logformat.conf.file; + curproxy->conf.args.line = curproxy->logformat.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat, + if (!lf_expr_compile(&curproxy->logformat, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !lf_expr_postcheck(&curproxy->logformat, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse log-format : %s.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, err); + curproxy->logformat.conf.file, curproxy->logformat.conf.line, err); free(err); cfgerr++; } @@ -3425,21 +3456,18 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.logformat_sd_string) { + if (curproxy->logformat_sd.str) { curproxy->conf.args.ctx = ARGC_LOGSD; - curproxy->conf.args.file = curproxy->conf.lfsd_file; - curproxy->conf.args.line = curproxy->conf.lfsd_line; + curproxy->conf.args.file = curproxy->logformat_sd.conf.file; + curproxy->conf.args.line = curproxy->logformat_sd.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.logformat_sd_string, curproxy, &curproxy->logformat_sd, + if (!lf_expr_compile(&curproxy->logformat_sd, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { - ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n", - curproxy->conf.lfsd_file, curproxy->conf.lfsd_line, err); - free(err); - cfgerr++; - } else if (!add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err) || + !lf_expr_postcheck(&curproxy->logformat_sd, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n", - curproxy->conf.lfsd_file, curproxy->conf.lfsd_line, err); + curproxy->logformat_sd.conf.file, curproxy->logformat_sd.conf.line, err); free(err); cfgerr++; } @@ -3447,21 +3475,22 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.uniqueid_format_string) { + if (curproxy->format_unique_id.str) { int where = 0; curproxy->conf.args.ctx = ARGC_UIF; - curproxy->conf.args.file = curproxy->conf.uif_file; - curproxy->conf.args.line = curproxy->conf.uif_line; + curproxy->conf.args.file = curproxy->format_unique_id.conf.file; + curproxy->conf.args.line = curproxy->format_unique_id.conf.line; err = NULL; if (curproxy->cap & PR_CAP_FE) where |= SMP_VAL_FE_HRQ_HDR; if (curproxy->cap & PR_CAP_BE) where |= SMP_VAL_BE_HRQ_HDR; - if (!parse_logformat_string(curproxy->conf.uniqueid_format_string, curproxy, &curproxy->format_unique_id, - LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err)) { + if (!lf_expr_compile(&curproxy->format_unique_id, &curproxy->conf.args, + LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err) || + !lf_expr_postcheck(&curproxy->format_unique_id, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse unique-id : %s.\n", - curproxy->conf.uif_file, curproxy->conf.uif_line, err); + curproxy->format_unique_id.conf.file, curproxy->format_unique_id.conf.line, err); free(err); cfgerr++; } @@ -3469,16 +3498,17 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.error_logformat_string) { + if (curproxy->logformat_error.str) { curproxy->conf.args.ctx = ARGC_LOG; - curproxy->conf.args.file = curproxy->conf.elfs_file; - curproxy->conf.args.line = curproxy->conf.elfs_line; + curproxy->conf.args.file = curproxy->logformat_error.conf.file; + curproxy->conf.args.line = curproxy->logformat_error.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.error_logformat_string, curproxy, &curproxy->logformat_error, + if (!lf_expr_compile(&curproxy->logformat_error, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !lf_expr_postcheck(&curproxy->logformat_error, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse error-log-format : %s.\n", - curproxy->conf.elfs_file, curproxy->conf.elfs_line, err); + curproxy->logformat_error.conf.file, curproxy->logformat_error.conf.line, err); free(err); cfgerr++; } @@ -3655,8 +3685,6 @@ out_uri_auth_compat: newsrv->conf.id.key = newsrv->puid = next_id; eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id); } - newsrv->conf.name.key = newsrv->id; - ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); next_id++; newsrv = newsrv->next; @@ -3723,12 +3751,6 @@ out_uri_auth_compat: * on what LB algorithm was chosen. */ - if (curproxy->mode == PR_MODE_SYSLOG) { - /* log load-balancing requires special init that is performed - * during log-postparsing step - */ - goto skip_server_lb_init; - } curproxy->lbprm.algo &= ~(BE_LB_LKUP | BE_LB_PROP_DYN); switch (curproxy->lbprm.algo & BE_LB_KIND) { case BE_LB_KIND_RR: @@ -3767,8 +3789,13 @@ out_uri_auth_compat: init_server_map(curproxy); } break; + case BE_LB_KIND_SA: + if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_SA_SS) { + curproxy->lbprm.algo |= BE_LB_PROP_DYN; + init_server_ss(curproxy); + } + break; } - skip_server_lb_init: HA_RWLOCK_INIT(&curproxy->lbprm.lock); if (curproxy->options & PR_O_LOGASAP) @@ -3776,7 +3803,7 @@ out_uri_auth_compat: if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) && (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->loggers) && - (!LIST_ISEMPTY(&curproxy->logformat) || !LIST_ISEMPTY(&curproxy->logformat_sd))) { + (!lf_expr_isempty(&curproxy->logformat) || !lf_expr_isempty(&curproxy->logformat_sd))) { ha_warning("log format ignored for %s '%s' since it has no log address.\n", proxy_type_str(curproxy), curproxy->id); err_code |= ERR_WARN; @@ -3798,6 +3825,12 @@ out_uri_auth_compat: err_code |= ERR_WARN; } + if (isttest(curproxy->monitor_uri)) { + ha_warning("'monitor-uri' statement ignored for %s '%s' as it requires HTTP mode.\n", + proxy_type_str(curproxy), curproxy->id); + err_code |= ERR_WARN; + } + if (!LIST_ISEMPTY(&curproxy->http_req_rules)) { ha_warning("'http-request' rules ignored for %s '%s' as they require HTTP mode.\n", proxy_type_str(curproxy), curproxy->id); @@ -4181,6 +4214,11 @@ init_proxies_list_stage2: /* listener ID not set, use automatic numbering with first * spare entry starting with next_luid. */ + if (listener->by_fe.p != &curproxy->conf.listeners) { + struct listener *prev_li = LIST_PREV(&listener->by_fe, typeof(prev_li), by_fe); + if (prev_li->luid) + next_id = prev_li->luid + 1; + } next_id = get_next_id(&curproxy->conf.used_listener_id, next_id); listener->conf.id.key = listener->luid = next_id; eb32_insert(&curproxy->conf.used_listener_id, &listener->conf.id); diff --git a/src/check.c b/src/check.c index 2753c93..64464c4 100644 --- a/src/check.c +++ b/src/check.c @@ -1031,9 +1031,9 @@ int httpchk_build_status_header(struct server *s, struct buffer *buf) s->queue.length); if ((s->cur_state == SRV_ST_STARTING) && - ns_to_sec(now_ns) < s->last_change + s->slowstart && - ns_to_sec(now_ns) >= s->last_change) { - ratio = MAX(1, 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart); + ns_to_sec(now_ns) < s->counters.last_change + s->slowstart && + ns_to_sec(now_ns) >= s->counters.last_change) { + ratio = MAX(1, 100 * (ns_to_sec(now_ns) - s->counters.last_change) / s->slowstart); chunk_appendf(buf, "; throttle=%d%%", ratio); } @@ -1382,7 +1382,7 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state) * as a failed response coupled with "observe layer7" caused the * server state to be suddenly changed. */ - sc_conn_drain_and_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_DRAIN|SE_SHW_SILENT); } if (sc) { @@ -1415,8 +1415,7 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state) } } - if (LIST_INLIST(&check->buf_wait.list)) - LIST_DEL_INIT(&check->buf_wait.list); + b_dequeue(&check->buf_wait); check_release_buf(check, &check->bi); check_release_buf(check, &check->bo); @@ -1505,13 +1504,13 @@ int check_buf_available(void *target) BUG_ON(!check->sc); - if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) { + if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi, DB_CHANNEL)) { TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check); check->state &= ~CHK_ST_IN_ALLOC; tasklet_wakeup(check->sc->wait_event.tasklet); return 1; } - if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) { + if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo, DB_CHANNEL)) { TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check); check->state &= ~CHK_ST_OUT_ALLOC; tasklet_wakeup(check->sc->wait_event.tasklet); @@ -1529,10 +1528,8 @@ struct buffer *check_get_buf(struct check *check, struct buffer *bptr) struct buffer *buf = NULL; if (likely(!LIST_INLIST(&check->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - check->buf_wait.target = check; - check->buf_wait.wakeup_cb = check_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &check->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_CHANNEL)) == NULL)) { + b_queue(DB_CHANNEL, &check->buf_wait, check, check_buf_available); } return buf; } @@ -455,7 +455,7 @@ static struct proxy *cli_alloc_fe(const char *name, const char *file, int line) init_new_proxy(fe); fe->next = proxies_list; proxies_list = fe; - fe->last_change = ns_to_sec(now_ns); + fe->fe_counters.last_change = ns_to_sec(now_ns); fe->id = strdup("GLOBAL"); fe->cap = PR_CAP_FE|PR_CAP_INT; fe->maxconn = 10; /* default to 10 concurrent connections */ @@ -742,9 +742,8 @@ static int cli_parse_request(struct appctx *appctx) int i = 0; struct cli_kw *kw; - p = appctx->chunk->area; - end = p + appctx->chunk->data; - + p = b_head(&appctx->inbuf); + end = b_tail(&appctx->inbuf); /* * Get pointers on words. * One extra slot is reserved to store a pointer on a null byte. @@ -806,29 +805,13 @@ static int cli_parse_request(struct appctx *appctx) i++; } /* fill unused slots */ - p = appctx->chunk->area + appctx->chunk->data; + p = b_tail(&appctx->inbuf); for (; i < MAX_CLI_ARGS + 1; i++) args[i] = p; if (!**args) return 0; - if (appctx->st1 & APPCTX_CLI_ST1_SHUT_EXPECTED) { - /* The previous command line was finished by a \n in non-interactive mode. - * It should not be followed by another command line. In non-interactive mode, - * only one line should be processed. Because of a bug, it is not respected. - * So emit a warning, only once in the process life, to warn users their script - * must be updated. - */ - appctx->st1 &= ~APPCTX_CLI_ST1_SHUT_EXPECTED; - if (ONLY_ONCE()) { - ha_warning("Commands sent to the CLI were chained using a new line character while in non-interactive mode." - " This is not reliable, not officially supported and will not be supported anymore in future versions. " - "Please use ';' to delimit commands instead."); - } - } - - kw = cli_find_kw(args); if (!kw || (kw->level & ~appctx->cli_level & ACCESS_MASTER_ONLY) || @@ -916,6 +899,151 @@ static int cli_output_msg(struct appctx *appctx, const char *msg, int severity, return applet_putchk(appctx, tmp); } +int cli_init(struct appctx *appctx) +{ + struct stconn *sc = appctx_sc(appctx); + struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf; + + appctx->cli_severity_output = bind_conf->severity_output; + applet_reset_svcctx(appctx); + appctx->st0 = CLI_ST_GETREQ; + appctx->cli_level = bind_conf->level; + + /* Wakeup the applet ASAP. */ + applet_need_more_data(appctx); + return 0; + +} + +size_t cli_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned flags) +{ + char *str; + size_t len, ret = 0; + int lf = 0; + + if (appctx->st0 == CLI_ST_INIT) + cli_init(appctx); + else if (appctx->st0 != CLI_ST_GETREQ) + goto end; + + if (b_space_wraps(&appctx->inbuf)) + b_slow_realign(&appctx->inbuf, trash.area, b_data(&appctx->inbuf)); + + while (1) { + /* payload doesn't take escapes nor does it end on semi-colons, + * so we use the regular getline. Normal mode however must stop + * on LFs and semi-colons that are not prefixed by a backslash. + * Note we reserve one byte at the end to insert a trailing nul + * byte. + */ + str = b_tail(&appctx->inbuf); + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) + len = b_getdelim(buf, ret, count, str, b_room(&appctx->inbuf) - 1, "\n;", '\\'); + else + len = b_getline(buf, ret, count, str, b_room(&appctx->inbuf) - 1); + + if (!len) { + if (!b_room(buf) || (count > b_room(&appctx->inbuf) - 1)) { + cli_err(appctx, "The command is too big for the buffer size. Please change tune.bufsize in the configuration to use a bigger command.\n"); + applet_set_error(appctx); + b_reset(&appctx->inbuf); + } + else if (flags & CO_SFL_LAST_DATA) { + applet_set_eos(appctx); + applet_set_error(appctx); + b_reset(&appctx->inbuf); + } + break; + } + + ret += len; + count -= len; + + if (str[len-1] == '\n') + lf = 1; + + /* Remove the trailing \r, if any and add a null byte at the + * end. For normal mode, the trailing \n is removed, but we + * conserve if for payload mode. + */ + len--; + if (len && str[len-1] == '\r') + len--; + if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { + str[len+1] = '\0'; + b_add(&appctx->inbuf, len+1); + } + else { + str[len] = '\0'; + b_add(&appctx->inbuf, len); + } + + if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { + /* look for a pattern */ + if (len == strlen(appctx->cli_payload_pat)) { + /* here use 'len' because str still contains the \n */ + if (strncmp(str, appctx->cli_payload_pat, len) == 0) { + /* remove the last two \n */ + b_sub(&appctx->inbuf, strlen(appctx->cli_payload_pat) + 2); + *b_tail(&appctx->inbuf) = '\0'; + appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD; + if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) + appctx->st1 |= APPCTX_CLI_ST1_LASTCMD; + } + } + } + else { + char *last_arg; + + /* + * Look for the "payload start" pattern at the end of a + * line Its location is not remembered here, this is + * just to switch to a gathering mode. + * + * The pattern must start by << followed by 0 to 7 + * characters, and finished by the end of the command + * (\n or ;). + */ + + /* look for the first space starting by the end of the line */ + for (last_arg = b_tail(&appctx->inbuf); last_arg != b_head(&appctx->inbuf); last_arg--) { + if (*last_arg == ' ' || *last_arg == '\t') { + last_arg++; + break; + } + } + + if (strncmp(last_arg, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) { + ssize_t pat_len = strlen(last_arg + strlen(PAYLOAD_PATTERN)); + + /* A customized pattern can't be more than 7 characters + * if it's more, don't make it a payload + */ + if (pat_len < sizeof(appctx->cli_payload_pat)) { + appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD; + /* copy the customized pattern, don't store the << */ + strncpy(appctx->cli_payload_pat, last_arg + strlen(PAYLOAD_PATTERN), sizeof(appctx->cli_payload_pat)-1); + appctx->cli_payload_pat[sizeof(appctx->cli_payload_pat)-1] = '\0'; + b_add(&appctx->inbuf, 1); // keep the trailing \0 after the pattern + } + } + else { + if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) + appctx->st1 |= APPCTX_CLI_ST1_LASTCMD; + } + } + + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) || (appctx->st1 & APPCTX_CLI_ST1_PROMPT)) { + appctx->st0 = CLI_ST_PARSEREQ; + break; + } + } + b_del(buf, ret); + + end: + return ret; +} + /* This I/O handler runs as an applet embedded in a stream connector. It is * used to processes I/O from/to the stats unix socket. The system relies on a * state machine handling requests and various responses. We read a request, @@ -926,181 +1054,62 @@ static int cli_output_msg(struct appctx *appctx, const char *msg, int severity, */ static void cli_io_handler(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf; - int reql; - int len; - int lf = 0; + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto out; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { - co_skip(sc_oc(sc), co_data(sc_oc(sc))); + if (!appctx_get_buf(appctx, &appctx->outbuf)) { goto out; } - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + appctx->st0 = CLI_ST_END; goto out; } while (1) { if (appctx->st0 == CLI_ST_INIT) { /* reset severity to default at init */ - appctx->cli_severity_output = bind_conf->severity_output; - applet_reset_svcctx(appctx); - appctx->st0 = CLI_ST_GETREQ; - appctx->cli_level = bind_conf->level; + cli_init(appctx); + break; } else if (appctx->st0 == CLI_ST_END) { - se_fl_set(appctx->sedesc, SE_FL_EOS); - free_trash_chunk(appctx->chunk); - appctx->chunk = NULL; + applet_set_eos(appctx); break; } else if (appctx->st0 == CLI_ST_GETREQ) { - char *str; - - /* use a trash chunk to store received data */ - if (!appctx->chunk) { - appctx->chunk = alloc_trash_chunk(); - if (!appctx->chunk) { - se_fl_set(appctx->sedesc, SE_FL_ERROR); - appctx->st0 = CLI_ST_END; - continue; - } - } - - str = appctx->chunk->area + appctx->chunk->data; - - /* ensure we have some output room left in the event we - * would want to return some info right after parsing. - */ - if (buffer_almost_full(sc_ib(sc))) { - sc_need_room(sc, b_size(&res->buf) / 2); - break; - } - - /* payload doesn't take escapes nor does it end on semi-colons, so - * we use the regular getline. Normal mode however must stop on - * LFs and semi-colons that are not prefixed by a backslash. Note - * that we reserve one byte at the end to insert a trailing nul byte. + /* Now we close the output if we're not in interactive + * mode and the request buffer is empty. This still + * allows pipelined requests to be sent in + * non-interactive mode. */ - - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) - reql = co_getline(sc_oc(sc), str, - appctx->chunk->size - appctx->chunk->data - 1); - else - reql = co_getdelim(sc_oc(sc), str, - appctx->chunk->size - appctx->chunk->data - 1, - "\n;", '\\'); - - if (reql <= 0) { /* closed or EOL not found */ - if (reql == 0) - break; - se_fl_set(appctx->sedesc, SE_FL_ERROR); + if (se_fl_test(appctx->sedesc, SE_FL_SHW)) { appctx->st0 = CLI_ST_END; continue; } - - if (str[reql-1] == '\n') - lf = 1; - - /* now it is time to check that we have a full line, - * remove the trailing \n and possibly \r, then cut the - * line. + break; + } + else if (appctx->st0 == CLI_ST_PARSEREQ) { + /* ensure we have some output room left in the event we + * would want to return some info right after parsing. */ - len = reql - 1; - if (str[len] != '\n' && str[len] != ';') { - se_fl_set(appctx->sedesc, SE_FL_ERROR); - appctx->st0 = CLI_ST_END; - continue; - } - - if (len && str[len-1] == '\r') - len--; - - str[len] = '\0'; - appctx->chunk->data += len; - - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { - appctx->chunk->area[appctx->chunk->data] = '\n'; - appctx->chunk->area[appctx->chunk->data + 1] = 0; - appctx->chunk->data++; + if (buffer_almost_full(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + break; } + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { - /* look for a pattern */ - if (len == strlen(appctx->cli_payload_pat)) { - /* here use 'len' because str still contains the \n */ - if (strncmp(str, appctx->cli_payload_pat, len) == 0) { - /* remove the last two \n */ - appctx->chunk->data -= strlen(appctx->cli_payload_pat) + 2; - appctx->chunk->area[appctx->chunk->data] = 0; - cli_parse_request(appctx); - chunk_reset(appctx->chunk); - /* NB: cli_sock_parse_request() may have put - * another CLI_ST_O_* into appctx->st0. - */ - - appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD; - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) - appctx->st1 |= APPCTX_CLI_ST1_SHUT_EXPECTED; - } - } - } - else { - char *last_arg; - /* - * Look for the "payload start" pattern at the end of a line - * Its location is not remembered here, this is just to switch - * to a gathering mode. - * The pattern must start by << followed by 0 - * to 7 characters, and finished by the end of - * the command (\n or ;). - */ - /* look for the first space starting by the end of the line */ - for (last_arg = appctx->chunk->area + appctx->chunk->data; last_arg != appctx->chunk->area; last_arg--) { - if (*last_arg == ' ' || *last_arg == '\t') { - last_arg++; - break; - } - } - if (strncmp(last_arg, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) { - ssize_t pat_len = strlen(last_arg + strlen(PAYLOAD_PATTERN)); - - /* A customized pattern can't be more than 7 characters - * if it's more, don't make it a payload - */ - if (pat_len < sizeof(appctx->cli_payload_pat)) { - appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD; - /* copy the customized pattern, don't store the << */ - strncpy(appctx->cli_payload_pat, last_arg + strlen(PAYLOAD_PATTERN), sizeof(appctx->cli_payload_pat)-1); - appctx->cli_payload_pat[sizeof(appctx->cli_payload_pat)-1] = '\0'; - appctx->chunk->data++; // keep the trailing \0 after the pattern - } - } - else { - /* no payload, the command is complete: parse the request */ - cli_parse_request(appctx); - chunk_reset(appctx->chunk); - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) - appctx->st1 |= APPCTX_CLI_ST1_SHUT_EXPECTED; - } + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) { + cli_parse_request(appctx); + b_reset(&appctx->inbuf); } - - /* re-adjust req buffer */ - co_skip(sc_oc(sc), reql); - sc_opposite(sc)->flags |= SC_FL_RCV_ONCE; /* we plan to read small requests */ } else { /* output functions */ struct cli_print_ctx *ctx; const char *msg; int sev; - + cli_output: switch (appctx->st0) { case CLI_ST_PROMPT: break; @@ -1146,17 +1155,28 @@ static void cli_io_handler(struct appctx *appctx) appctx->st0 == CLI_ST_PRINT_UMSGERR) { usermsgs_clr(NULL); } + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; } + if (applet_fl_test(appctx, APPCTX_FL_ERR_PENDING)) { + appctx->st0 = CLI_ST_END; + continue; + } + break; case CLI_ST_CALLBACK: /* use custom pointer */ if (appctx->io_handler) if (appctx->io_handler(appctx)) { + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; if (appctx->io_release) { appctx->io_release(appctx); appctx->io_release = NULL; + /* some release handlers might have + * pending output to print. + */ + continue; } } break; @@ -1175,7 +1195,7 @@ static void cli_io_handler(struct appctx *appctx) * when entering a payload with interactive mode, change the prompt * to emphasize that more data can still be sent */ - if (appctx->chunk->data && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) + if (b_data(&appctx->inbuf) && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) prompt = "+ "; else if (appctx->st1 & APPCTX_CLI_ST1_TIMED) { uint up = ns_to_sec(now_ns - start_time_ns); @@ -1209,8 +1229,8 @@ static void cli_io_handler(struct appctx *appctx) * allows pipelined requests to be sent in * non-interactive mode. */ - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && !co_data(req) && (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD))) { - se_fl_set(appctx->sedesc, SE_FL_EOI); + if ((appctx->st1 & (APPCTX_CLI_ST1_PROMPT|APPCTX_CLI_ST1_PAYLOAD|APPCTX_CLI_ST1_LASTCMD)) == APPCTX_CLI_ST1_LASTCMD) { + applet_set_eoi(appctx); appctx->st0 = CLI_ST_END; continue; } @@ -1230,14 +1250,16 @@ static void cli_io_handler(struct appctx *appctx) * refills the buffer with new bytes in non-interactive * mode, avoiding to close on apparently empty commands. */ - if (co_data(sc_oc(sc))) { - appctx_wakeup(appctx); - goto out; - } + break; } } out: + if (appctx->st0 == CLI_ST_END) { + /* eat the whole request */ + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + } return; } @@ -1247,9 +1269,6 @@ static void cli_io_handler(struct appctx *appctx) */ static void cli_release_handler(struct appctx *appctx) { - free_trash_chunk(appctx->chunk); - appctx->chunk = NULL; - if (appctx->io_release) { appctx->io_release(appctx); appctx->io_release = NULL; @@ -1272,13 +1291,8 @@ static void cli_release_handler(struct appctx *appctx) static int cli_io_handler_show_env(struct appctx *appctx) { struct show_env_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); char **var = ctx->var; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); /* we have two inner loops here, one for the proxy, the other one for @@ -1308,16 +1322,11 @@ static int cli_io_handler_show_env(struct appctx *appctx) */ static int cli_io_handler_show_fd(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct show_fd_ctx *fdctx = appctx->svcctx; uint match = fdctx->show_mask; int fd = fdctx->fd; int ret = 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - chunk_reset(&trash); /* isolate the threads once per round. We're limited to a buffer worth @@ -2010,6 +2019,174 @@ static int cli_parse_set_ratelimit(char **args, char *payload, struct appctx *ap return 1; } +/* Parse a "wait <time>" command. + * It uses a "cli_wait_ctx" struct for its context. + * Returns 0 if the server deletion has been successfully scheduled, 1 on failure. + */ +static int cli_parse_wait(char **args, char *payload, struct appctx *appctx, void *private) +{ + struct cli_wait_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + uint wait_ms; + const char *err; + + if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) + return 1; + + if (!*args[1]) + return cli_err(appctx, "Expects a duration in milliseconds.\n"); + + err = parse_time_err(args[1], &wait_ms, TIME_UNIT_MS); + if (err || wait_ms < 1) { + /* in case -h is passed as the first option, continue to the next test */ + if (strcmp(args[1], "-h") == 0) + args--; + else + return cli_err(appctx, "Invalid duration.\n"); + } + + if (strcmp(args[2], "srv-removable") == 0) { + struct ist be_name, sv_name; + + if (!*args[3]) + return cli_err(appctx, "Missing server name (<backend>/<server>).\n"); + + sv_name = ist(args[3]); + be_name = istsplit(&sv_name, '/'); + if (!istlen(sv_name)) + return cli_err(appctx, "Require 'backend/server'.\n"); + + be_name = istdup(be_name); + sv_name = istdup(sv_name); + if (!isttest(be_name) || !isttest(sv_name)) { + free(istptr(be_name)); + free(istptr(sv_name)); + return cli_err(appctx, "Out of memory trying to clone the server name.\n"); + } + + ctx->args[0] = ist0(be_name); + ctx->args[1] = ist0(sv_name); + ctx->cond = CLI_WAIT_COND_SRV_UNUSED; + } + else if (*args[2]) { + /* show the command's help either upon request (-h) or error */ + err = "Usage: wait {-h|<duration>} [condition [args...]]\n" + " - '-h' displays this help\n" + " - <duration> is the maximum wait time, optionally suffixed by the unit among\n" + " 'us', 'ms', 's', 'm', 'h', and 'd'. ; the default unit is milliseconds.\n" + " - <condition> indicates what to wait for, no longer than the specified\n" + " duration. Supported conditions are:\n" + " - <none> : by default, just sleep for the specified duration.\n" + " - srv-removable <px>/<sv> : wait for this server to become removable.\n" + ""; + + if (strcmp(args[2], "-h") == 0) + return cli_msg(appctx, LOG_INFO, err); + else + return cli_err(appctx, err); + } + + ctx->start = now_ms; + ctx->deadline = tick_add(now_ms, wait_ms); + + /* proceed with the I/O handler */ + return 0; +} + +/* Execute a "wait" condition. The delay is exponentially incremented between + * now_ms and ctx->deadline in powers of 1.5 and with a bound set to 10% of the + * programmed wait time, so that in a few wakeups we can later check a condition + * with reasonable accuracy. Shutdowns and other errors are handled as well and + * terminate the operation, but not new inputs so that it remains possible to + * chain other commands after it. Returns 0 if not finished, 1 if finished. + */ +static int cli_io_handler_wait(struct appctx *appctx) +{ + struct cli_wait_ctx *ctx = appctx->svcctx; + uint total, elapsed, left, wait; + int ret; + + /* note: upon first invocation, the timeout is not set */ + if (tick_isset(appctx->t->expire) && + !tick_is_expired(appctx->t->expire, now_ms)) + goto wait; + + /* here we should evaluate our waiting conditions, if any */ + + if (ctx->cond == CLI_WAIT_COND_SRV_UNUSED) { + /* check if the server in args[0]/args[1] can be released now */ + thread_isolate(); + ret = srv_check_for_deletion(ctx->args[0], ctx->args[1], NULL, NULL, NULL); + thread_release(); + + if (ret < 0) { + /* unrecoverable failure */ + ctx->error = CLI_WAIT_ERR_FAIL; + return 1; + } else if (ret > 0) { + /* immediate success */ + ctx->error = CLI_WAIT_ERR_DONE; + return 1; + } + /* let's check the timer */ + } + + /* and here we recalculate the new wait time or abort */ + left = tick_remain(now_ms, ctx->deadline); + if (!left) { + /* let the release handler know we've expired. When there is no + * wait condition, it's a simple sleep so we declare we're done. + */ + if (ctx->cond == CLI_WAIT_COND_NONE) + ctx->error = CLI_WAIT_ERR_DONE; + else + ctx->error = CLI_WAIT_ERR_EXP; + return 1; + } + + total = tick_remain(ctx->start, ctx->deadline); + elapsed = total - left; + wait = elapsed / 2 + 1; + if (wait > left) + wait = left; + else if (wait > total / 10) + wait = total / 10; + + appctx->t->expire = tick_add(now_ms, wait); + + wait: + /* Stop waiting upon close/abort/error */ + if (unlikely(se_fl_test(appctx->sedesc, SE_FL_SHW)) && !b_data(&appctx->inbuf)) { + ctx->error = CLI_WAIT_ERR_INTR; + return 1; + } + + return 0; +} + + +/* release structs allocated by "delete server" */ +static void cli_release_wait(struct appctx *appctx) +{ + struct cli_wait_ctx *ctx = appctx->svcctx; + const char *msg; + int i; + + switch (ctx->error) { + case CLI_WAIT_ERR_EXP: msg = "Wait delay expired.\n"; break; + case CLI_WAIT_ERR_INTR: msg = "Interrupted.\n"; break; + case CLI_WAIT_ERR_FAIL: msg = ctx->msg ? ctx->msg : "Failed.\n"; break; + default: msg = "Done.\n"; break; + } + + for (i = 0; i < sizeof(ctx->args) / sizeof(ctx->args[0]); i++) + ha_free(&ctx->args[i]); + + if (ctx->error == CLI_WAIT_ERR_DONE) + cli_msg(appctx, LOG_INFO, msg); + else + cli_err(appctx, msg); +} + /* parse the "expose-fd" argument on the bind lines */ static int bind_parse_expose_fd(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { @@ -2471,8 +2648,13 @@ static int pcli_prefix_to_pid(const char *prefix) return -1; } -/* Return:: - * >= 0 : number of words to escape +/* + * pcli_find_and_exec_kw() parses a command for the master CLI. It looks for a + * prefix or a command that is handled directly by the proxy and never sent to + * a worker. + * + * Return: + * >= 0 : number of words that were parsed and need to be skipped * = -1 : error */ int pcli_find_and_exec_kw(struct stream *s, char **args, int argl, char **errmsg, int *next_pid) @@ -2959,7 +3141,7 @@ int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit) pendconn_free(s); /* let's do a final log if we need it */ - if (!LIST_ISEMPTY(&fe->logformat) && s->logs.logwait && + if (!lf_expr_isempty(&fe->logformat) && s->logs.logwait && !(s->flags & SF_MONITOR) && (!(fe->options & PR_O_NULLNOLOG) || s->req.total)) { s->do_log(s); @@ -3366,6 +3548,8 @@ static struct applet cli_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<CLI>", /* used for logging */ .fct = cli_io_handler, + .rcv_buf = appctx_raw_rcv_buf, + .snd_buf = cli_snd_buf, .release = cli_release_handler, }; @@ -3374,6 +3558,8 @@ static struct applet mcli_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<MCLI>", /* used for logging */ .fct = cli_io_handler, + .rcv_buf = appctx_raw_rcv_buf, + .snd_buf = cli_snd_buf, .release = cli_release_handler, }; @@ -3401,6 +3587,7 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "version", NULL }, "show version : show version of the current process", cli_parse_show_version, NULL, NULL, NULL, ACCESS_MASTER }, { { "operator", NULL }, "operator : lower the level of the current CLI session to operator", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER}, { { "user", NULL }, "user : lower the level of the current CLI session to user", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER}, + { { "wait", NULL }, "wait {-h|<delay_ms>} cond [args...] : wait the specified delay or condition (-h to see list)", cli_parse_wait, cli_io_handler_wait, cli_release_wait, NULL }, {{},} }}; diff --git a/src/clock.c b/src/clock.c index ec2133c..7734389 100644 --- a/src/clock.c +++ b/src/clock.c @@ -135,7 +135,7 @@ uint64_t now_cpu_time_thread(int thr) /* set the clock source for the local thread */ void clock_set_local_source(void) { -#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0) #ifdef USE_THREAD pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]); #else diff --git a/src/compression.c b/src/compression.c index 7b75461..a4464e0 100644 --- a/src/compression.c +++ b/src/compression.c @@ -300,7 +300,7 @@ static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int * data and need a buffer now. We reuse the same buffer, as it's * not used out of the scope of a series of add_data()*, end(). */ - if (b_alloc(&tmpbuf) == NULL) + if (b_alloc(&tmpbuf, DB_PERMANENT) == NULL) return -1; /* no memory */ b_reset(&tmpbuf); memcpy(b_tail(&tmpbuf), comp_ctx->direct_ptr, comp_ctx->direct_len); diff --git a/src/connection.c b/src/connection.c index ed6beb7..3fedad9 100644 --- a/src/connection.c +++ b/src/connection.c @@ -134,7 +134,7 @@ fail: /* If connection is interrupted without CO_FL_ERROR, receiver task won't free it. */ BUG_ON(!(conn->flags & CO_FL_ERROR)); - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_RES); } return -1; } else @@ -474,7 +474,7 @@ void conn_init(struct connection *conn, void *target) conn->proxy_netns = NULL; MT_LIST_INIT(&conn->toremove_list); if (conn_is_back(conn)) - LIST_INIT(&conn->session_list); + LIST_INIT(&conn->sess_el); else LIST_INIT(&conn->stopping_list); LIST_INIT(&conn->tlv_list); @@ -511,12 +511,12 @@ static int conn_backend_init(struct connection *conn) */ static void conn_backend_deinit(struct connection *conn) { - /* If the connection is owned by the session, remove it from its list - */ - if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) { + /* If the connection is owned by the session, remove it from its list. */ + if (LIST_INLIST(&conn->sess_el)) session_unown_conn(conn->owner, conn); - } - else if (!(conn->flags & CO_FL_PRIVATE)) { + + /* If the connection is not private, it is accounted by the server. */ + if (!(conn->flags & CO_FL_PRIVATE)) { if (obj_type(conn->target) == OBJ_TYPE_SERVER) srv_release_conn(__objt_server(conn->target), conn); } @@ -603,6 +603,21 @@ void conn_free(struct connection *conn) pool_free(pool_head_connection, conn); } +/* Close all <conn> internal layers accordingly prior to freeing it. */ +void conn_release(struct connection *conn) +{ + if (conn->mux) { + conn->mux->destroy(conn->ctx); + } + else { + conn_stop_tracking(conn); + conn_full_close(conn); + if (conn->destroy_cb) + conn->destroy_cb(conn); + conn_free(conn); + } +} + struct conn_hash_node *conn_alloc_hash_node(struct connection *conn) { struct conn_hash_node *hash_node = NULL; @@ -1114,111 +1129,112 @@ int conn_recv_proxy(struct connection *conn, int flag) break; } - /* TLV parsing */ - while (tlv_offset < total_v2_len) { - struct ist tlv; - struct tlv *tlv_packet = NULL; - struct conn_tlv_list *new_tlv = NULL; - size_t data_len = 0; - - /* Verify that we have at least TLV_HEADER_SIZE bytes left */ - if (tlv_offset + TLV_HEADER_SIZE > total_v2_len) - goto bad_header; + /* unsupported protocol, keep local connection address */ + break; + case 0x00: /* LOCAL command */ + /* keep local connection address for LOCAL */ - tlv_packet = (struct tlv *) &trash.area[tlv_offset]; - tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet)); - tlv_offset += istlen(tlv) + TLV_HEADER_SIZE; + tlv_offset = PP2_HEADER_LEN; + break; + default: + goto bad_header; /* not a supported command */ + } - /* Verify that the TLV length does not exceed the total PROXYv2 length */ - if (tlv_offset > total_v2_len) - goto bad_header; + /* TLV parsing */ + while (tlv_offset < total_v2_len) { + struct ist tlv; + struct tlv *tlv_packet = NULL; + struct conn_tlv_list *new_tlv = NULL; + size_t data_len = 0; - /* Prepare known TLV types */ - switch (tlv_packet->type) { - case PP2_TYPE_CRC32C: { - uint32_t n_crc32c; + /* Verify that we have at least TLV_HEADER_SIZE bytes left */ + if (tlv_offset + TLV_HEADER_SIZE > total_v2_len) + goto bad_header; - /* Verify that this TLV is exactly 4 bytes long */ - if (istlen(tlv) != PP2_CRC32C_LEN) - goto bad_header; + tlv_packet = (struct tlv *) &trash.area[tlv_offset]; + tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet)); + tlv_offset += istlen(tlv) + TLV_HEADER_SIZE; - n_crc32c = read_n32(istptr(tlv)); - write_n32(istptr(tlv), 0); // compute with CRC==0 + /* Verify that the TLV length does not exceed the total PROXYv2 length */ + if (tlv_offset > total_v2_len) + goto bad_header; - if (hash_crc32c(trash.area, total_v2_len) != n_crc32c) - goto bad_header; - break; - } -#ifdef USE_NS - case PP2_TYPE_NETNS: { - const struct netns_entry *ns; + /* Prepare known TLV types */ + switch (tlv_packet->type) { + case PP2_TYPE_CRC32C: { + uint32_t n_crc32c; - ns = netns_store_lookup(istptr(tlv), istlen(tlv)); - if (ns) - conn->proxy_netns = ns; - break; - } -#endif - case PP2_TYPE_AUTHORITY: { - /* For now, keep the length restriction by HAProxy */ - if (istlen(tlv) > HA_PP2_AUTHORITY_MAX) - goto bad_header; + /* Verify that this TLV is exactly 4 bytes long */ + if (istlen(tlv) != PP2_CRC32C_LEN) + goto bad_header; - break; - } - case PP2_TYPE_UNIQUE_ID: { - if (istlen(tlv) > UNIQUEID_LEN) - goto bad_header; - break; - } - default: - break; - } + n_crc32c = read_n32(istptr(tlv)); + write_n32(istptr(tlv), 0); // compute with CRC==0 - /* If we did not find a known TLV type that we can optimize for, we generically allocate it */ - data_len = get_tlv_length(tlv_packet); + if (hash_crc32c(trash.area, total_v2_len) != n_crc32c) + goto bad_header; + break; + } +#ifdef USE_NS + case PP2_TYPE_NETNS: { + const struct netns_entry *ns; - /* Prevent attackers from allocating too much memory */ - if (unlikely(data_len > HA_PP2_MAX_ALLOC)) - goto fail; + ns = netns_store_lookup(istptr(tlv), istlen(tlv)); + if (ns) + conn->proxy_netns = ns; + break; + } +#endif + case PP2_TYPE_AUTHORITY: { + /* For now, keep the length restriction by HAProxy */ + if (istlen(tlv) > HA_PP2_AUTHORITY_MAX) + goto bad_header; - /* Alloc memory based on data_len */ - if (data_len > HA_PP2_TLV_VALUE_256) - new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list)); - else if (data_len <= HA_PP2_TLV_VALUE_128) - new_tlv = pool_alloc(pool_head_pp_tlv_128); - else - new_tlv = pool_alloc(pool_head_pp_tlv_256); + break; + } + case PP2_TYPE_UNIQUE_ID: { + if (istlen(tlv) > UNIQUEID_LEN) + goto bad_header; + break; + } + default: + break; + } - if (unlikely(!new_tlv)) - goto fail; + /* If we did not find a known TLV type that we can optimize for, we generically allocate it */ + data_len = get_tlv_length(tlv_packet); - new_tlv->type = tlv_packet->type; + /* Prevent attackers from allocating too much memory */ + if (unlikely(data_len > HA_PP2_MAX_ALLOC)) + goto fail; - /* Save TLV to make it accessible via sample fetch */ - memcpy(new_tlv->value, tlv.ptr, data_len); - new_tlv->len = data_len; + /* Alloc memory based on data_len */ + if (data_len > HA_PP2_TLV_VALUE_256) + new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list)); + else if (data_len <= HA_PP2_TLV_VALUE_128) + new_tlv = pool_alloc(pool_head_pp_tlv_128); + else + new_tlv = pool_alloc(pool_head_pp_tlv_256); - LIST_APPEND(&conn->tlv_list, &new_tlv->list); - } + if (unlikely(!new_tlv)) + goto fail; + new_tlv->type = tlv_packet->type; - /* Verify that the PROXYv2 header ends at a TLV boundary. - * This is can not be true, because the TLV parsing already - * verifies that a TLV does not exceed the total length and - * also that there is space for a TLV header. - */ - BUG_ON(tlv_offset != total_v2_len); + /* Save TLV to make it accessible via sample fetch */ + memcpy(new_tlv->value, tlv.ptr, data_len); + new_tlv->len = data_len; - /* unsupported protocol, keep local connection address */ - break; - case 0x00: /* LOCAL command */ - /* keep local connection address for LOCAL */ - break; - default: - goto bad_header; /* not a supported command */ + LIST_APPEND(&conn->tlv_list, &new_tlv->list); } + /* Verify that the PROXYv2 header ends at a TLV boundary. + * This is can not be true, because the TLV parsing already + * verifies that a TLV does not exceed the total length and + * also that there is space for a TLV header. + */ + BUG_ON(tlv_offset != total_v2_len); + trash.data = total_v2_len; goto eat_header; @@ -1305,10 +1321,11 @@ int conn_send_proxy(struct connection *conn, unsigned int flag) */ if (sc && sc_strm(sc)) { + struct stream *strm = __sc_strm(sc); ret = make_proxy_line(trash.area, trash.size, objt_server(conn->target), sc_conn(sc_opposite(sc)), - __sc_strm(sc)); + strm, strm_sess(strm)); } else { /* The target server expects a LOCAL line to be sent first. Retrieving @@ -1319,7 +1336,7 @@ int conn_send_proxy(struct connection *conn, unsigned int flag) ret = make_proxy_line(trash.area, trash.size, objt_server(conn->target), conn, - NULL); + NULL, conn->owner); } if (!ret) @@ -1925,7 +1942,7 @@ static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const } /* Note: <remote> is explicitly allowed to be NULL */ -static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm) +static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm, struct session *sess) { const char pp2_signature[] = PP2_SIGNATURE; void *tlv_crc32c_p = NULL; @@ -2006,7 +2023,7 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct } } - if (strm) { + if (sess) { struct buffer *replace = NULL; list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) { @@ -2015,12 +2032,12 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct /* Users will always need to provide a value, in case of forwarding, they should use fc_pp_tlv. * for generic types. Otherwise, we will send an empty TLV. */ - if (!LIST_ISEMPTY(&srv_tlv->fmt)) { + if (!lf_expr_isempty(&srv_tlv->fmt)) { replace = alloc_trash_chunk(); if (unlikely(!replace)) return 0; - replace->data = build_logline(strm, replace->area, replace->size, &srv_tlv->fmt); + replace->data = sess_build_logline(sess, strm, replace->area, replace->size, &srv_tlv->fmt); if (unlikely((buf_len - ret) < sizeof(struct tlv))) { free_trash_chunk(replace); @@ -2163,12 +2180,12 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct } /* Note: <remote> is explicitly allowed to be NULL */ -int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm) +int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm, struct session *sess) { int ret = 0; if (srv && (srv->pp_opts & SRV_PP_V2)) { - ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm); + ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm, sess); } else { const struct sockaddr_storage *src = NULL; @@ -2514,6 +2531,59 @@ int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char return 1; } + +/* fetch the current number of streams opened for a connection */ +int smp_fetch_fc_nb_streams(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct connection *conn; + unsigned int nb_strm; + + conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) : smp->strm ? sc_conn(smp->strm->scb) : NULL; + + if (!conn) + return 0; + + if (!conn->mux || !conn->mux->ctl) { + if (!conn->mux) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + + nb_strm = conn->mux->ctl(conn, MUX_CTL_GET_NBSTRM, NULL); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = nb_strm; + + return 1; +} + +/* fetch the maximum number of streams supported by a connection */ +int smp_fetch_fc_streams_limit(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct connection *conn; + unsigned int strm_limit; + + conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) : smp->strm ? sc_conn(smp->strm->scb) : NULL; + + if (!conn) + return 0; + + if (!conn->mux || !conn->mux->ctl) { + if (!conn->mux) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + + strm_limit = conn->mux->ctl(conn, MUX_CTL_GET_MAXSTRM, NULL); + + smp->flags = 0; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = strm_limit; + + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Note: fetches that may return multiple types should be declared using the * appropriate pseudo-type. If not available it must be declared as the lowest @@ -2524,14 +2594,18 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV }, { "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, { "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, + { "bc_nb_streams", smp_fetch_fc_nb_streams, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, + { "bc_setting_streams_limit", smp_fetch_fc_streams_limit, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, { "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI }, + { "fc_nb_streams", smp_fetch_fc_nb_streams, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, { "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, - { "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L4CLI }, + { "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L5CLI }, + { "fc_settings_streams_limit", smp_fetch_fc_streams_limit, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, { /* END */ }, }}; @@ -2544,9 +2618,38 @@ static struct cfg_kw_list cfg_kws = {ILH, { INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +/* Generate the hash of a connection with params as input + * Each non-null field of params is taken into account for the hash calcul. + */ +uint64_t conn_hash_prehash(const char *buf, size_t size) +{ + return XXH64(buf, size, 0); +} + +/* Computes <data> hash into <hash>. In the same time, <flags> + * are updated with <type> for the hash header. + */ +static void conn_hash_update(XXH64_state_t *hash, + const void *data, size_t size, + enum conn_hash_params_t *flags, + enum conn_hash_params_t type) +{ + XXH64_update(hash, data, size); + *flags |= type; +} + +static uint64_t conn_hash_digest(XXH64_state_t *hash, + enum conn_hash_params_t flags) +{ + const uint64_t flags_u64 = (uint64_t)flags; + const uint64_t f_hash = XXH64_digest(hash); + + return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(f_hash); +} + /* private function to handle sockaddr as input for connection hash */ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, - char *buf, size_t *idx, + XXH64_state_t *hash, enum conn_hash_params_t *hash_flags, enum conn_hash_params_t param_type_addr, enum conn_hash_params_t param_type_port) @@ -2558,12 +2661,12 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, case AF_INET: addr = (struct sockaddr_in *)ss; - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr->sin_addr, sizeof(addr->sin_addr), hash_flags, param_type_addr); if (addr->sin_port) { - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr->sin_port, sizeof(addr->sin_port), hash_flags, param_type_port); } @@ -2573,12 +2676,12 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, case AF_INET6: addr6 = (struct sockaddr_in6 *)ss; - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr6->sin6_addr, sizeof(addr6->sin6_addr), hash_flags, param_type_addr); if (addr6->sin6_port) { - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr6->sin6_port, sizeof(addr6->sin6_port), hash_flags, param_type_port); } @@ -2587,76 +2690,48 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, } } -/* Generate the hash of a connection with params as input - * Each non-null field of params is taken into account for the hash calcul. - */ -uint64_t conn_hash_prehash(char *buf, size_t size) -{ - return XXH64(buf, size, 0); -} - -/* Append <data> into <buf> at <idx> offset in preparation for connection hash - * calcul. <idx> is incremented beyond data <size>. In the same time, <flags> - * are updated with <type> for the hash header. - */ -void conn_hash_update(char *buf, size_t *idx, - const void *data, size_t size, - enum conn_hash_params_t *flags, - enum conn_hash_params_t type) -{ - memcpy(&buf[*idx], data, size); - *idx += size; - *flags |= type; -} - -uint64_t conn_hash_digest(char *buf, size_t bufsize, - enum conn_hash_params_t flags) -{ - const uint64_t flags_u64 = (uint64_t)flags; - const uint64_t hash = XXH64(buf, bufsize, 0); - - return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(hash); -} - uint64_t conn_calculate_hash(const struct conn_hash_params *params) { - char *buf; - size_t idx = 0; - uint64_t hash = 0; enum conn_hash_params_t hash_flags = 0; + XXH64_state_t hash; - buf = trash.area; + XXH64_reset(&hash, 0); - conn_hash_update(buf, &idx, ¶ms->target, sizeof(params->target), &hash_flags, 0); + conn_hash_update(&hash, ¶ms->target, sizeof(params->target), &hash_flags, 0); - if (params->sni_prehash) { - conn_hash_update(buf, &idx, - ¶ms->sni_prehash, sizeof(params->sni_prehash), - &hash_flags, CONN_HASH_PARAMS_TYPE_SNI); + if (params->name_prehash) { + conn_hash_update(&hash, + ¶ms->name_prehash, sizeof(params->name_prehash), + &hash_flags, CONN_HASH_PARAMS_TYPE_NAME); } if (params->dst_addr) { conn_calculate_hash_sockaddr(params->dst_addr, - buf, &idx, &hash_flags, + &hash, &hash_flags, CONN_HASH_PARAMS_TYPE_DST_ADDR, CONN_HASH_PARAMS_TYPE_DST_PORT); } if (params->src_addr) { conn_calculate_hash_sockaddr(params->src_addr, - buf, &idx, &hash_flags, + &hash, &hash_flags, CONN_HASH_PARAMS_TYPE_SRC_ADDR, CONN_HASH_PARAMS_TYPE_SRC_PORT); } if (params->proxy_prehash) { - conn_hash_update(buf, &idx, + conn_hash_update(&hash, ¶ms->proxy_prehash, sizeof(params->proxy_prehash), &hash_flags, CONN_HASH_PARAMS_TYPE_PROXY); } - hash = conn_hash_digest(buf, idx, hash_flags); - return hash; + if (params->mark_tos_prehash) { + conn_hash_update(&hash, + ¶ms->mark_tos_prehash, sizeof(params->mark_tos_prehash), + &hash_flags, CONN_HASH_PARAMS_TYPE_MARK_TOS); + } + + return conn_hash_digest(&hash, hash_flags); } /* Reverse a <conn> connection instance. This effectively moves the connection @@ -2695,7 +2770,7 @@ int conn_reverse(struct connection *conn) /* data cannot wrap else prehash usage is incorrect */ BUG_ON(b_data(&conn->reverse.name) != b_contig_data(&conn->reverse.name, 0)); - hash_params.sni_prehash = + hash_params.name_prehash = conn_hash_prehash(b_head(&conn->reverse.name), b_data(&conn->reverse.name)); } @@ -2722,7 +2797,10 @@ int conn_reverse(struct connection *conn) conn->target = &l->obj_type; conn->flags |= CO_FL_ACT_REVERSING; - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_RES); + + /* Initialize session origin after reversal. Mandatory for several fetches. */ + sess->origin = &conn->obj_type; } /* Invert source and destination addresses if already set. */ diff --git a/src/cpuset.c b/src/cpuset.c index 82e350f..a20b81a 100644 --- a/src/cpuset.c +++ b/src/cpuset.c @@ -280,7 +280,7 @@ int cpu_map_configured(void) static int cpuset_alloc(void) { /* allocate the structures used to store CPU topology info */ - cpu_map = (struct cpu_map*)calloc(MAX_TGROUPS, sizeof(*cpu_map)); + cpu_map = calloc(MAX_TGROUPS, sizeof(*cpu_map)); if (!cpu_map) return 0; diff --git a/src/debug.c b/src/debug.c index 756c194..5f21f02 100644 --- a/src/debug.c +++ b/src/debug.c @@ -46,6 +46,7 @@ #include <haproxy/thread.h> #include <haproxy/time.h> #include <haproxy/tools.h> +#include <haproxy/trace.h> #include <import/ist.h> @@ -112,7 +113,7 @@ struct post_mortem { uid_t boot_uid; gid_t boot_gid; struct rlimit limit_fd; // RLIMIT_NOFILE - struct rlimit limit_ram; // RLIMIT_AS or RLIMIT_DATA + struct rlimit limit_ram; // RLIMIT_DATA #if defined(USE_THREAD) struct { @@ -456,13 +457,8 @@ void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx) */ static int cli_io_handler_show_threads(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); int thr; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - if (appctx->st0) thr = appctx->st1; else @@ -666,7 +662,7 @@ int debug_parse_cli_bug(char **args, char *payload, struct appctx *appctx, void return 1; _HA_ATOMIC_INC(&debug_commands_issued); - BUG_ON(one > zero); + BUG_ON(one > zero, "This was triggered on purpose from the CLI 'debug dev bug' command."); return 1; } @@ -679,7 +675,7 @@ int debug_parse_cli_warn(char **args, char *payload, struct appctx *appctx, void return 1; _HA_ATOMIC_INC(&debug_commands_issued); - WARN_ON(one > zero); + WARN_ON(one > zero, "This was triggered on purpose from the CLI 'debug dev warn' command."); return 1; } @@ -692,7 +688,7 @@ int debug_parse_cli_check(char **args, char *payload, struct appctx *appctx, voi return 1; _HA_ATOMIC_INC(&debug_commands_issued); - CHECK_IF(one > zero); + CHECK_IF(one > zero, "This was triggered on purpose from the CLI 'debug dev check' command."); return 1; } @@ -1504,6 +1500,112 @@ static int debug_parse_cli_sched(char **args, char *payload, struct appctx *appc return cli_err(appctx, "Not enough memory"); } +#if defined(DEBUG_DEV) +/* All of this is for "trace dbg" */ + +static struct trace_source trace_dbg __read_mostly = { + .name = IST("dbg"), + .desc = "trace debugger", + .report_events = ~0, // report everything by default +}; + +#define TRACE_SOURCE &trace_dbg +INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE); + +/* This is the task handler used to send traces in loops. Note that the task's + * context contains the number of remaining calls to be done. The task sends 20 + * messages per wakeup. + */ +static struct task *debug_trace_task(struct task *t, void *ctx, unsigned int state) +{ + ulong count; + + /* send 2 traces enter/leave +18 devel = 20 traces total */ + TRACE_ENTER(1); + TRACE_DEVEL("msg01 has 20 bytes .", 1); + TRACE_DEVEL("msg02 has 20 bytes .", 1); + TRACE_DEVEL("msg03 has 20 bytes .", 1); + TRACE_DEVEL("msg04 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg05 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg06 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg07 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg08 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg09 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg10 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg11 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg12 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg13 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg14 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg15 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg16 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_DEVEL("msg17 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_DEVEL("msg18 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_LEAVE(1); + + count = (ulong)t->context; + t->context = (void*)count - 1; + + if (count) + task_wakeup(t, TASK_WOKEN_MSG); + else { + task_destroy(t); + t = NULL; + } + return t; +} + +/* parse a "debug dev trace" command + * debug dev trace <nbthr>. + * It will create as many tasks (one per thread), starting from lowest threads. + * The traces will stop after 1M wakeups or 20M messages ~= 4GB of data. + */ +static int debug_parse_cli_trace(char **args, char *payload, struct appctx *appctx, void *private) +{ + unsigned long count = 1; + unsigned long i; + char *msg = NULL; + char *endarg; + + if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) + return 1; + + _HA_ATOMIC_INC(&debug_commands_issued); + + if (!args[3][0]) { + memprintf(&msg, "Need a thread count. Note that 20M msg will be sent per thread.\n"); + goto fail; + } + + /* parse the new value . */ + count = strtoll(args[3], &endarg, 0); + if (args[3][1] && *endarg) { + memprintf(&msg, "Ignoring unparsable thread number '%s'.\n", args[3]); + goto fail; + } + + if (count >= global.nbthread) + count = global.nbthread; + + for (i = 0; i < count; i++) { + struct task *task = task_new_on(i); + + if (!task) + goto fail; + + task->process = debug_trace_task; + task->context = (void*)(ulong)1000000; // 1M wakeups = 20M messages + task_wakeup(task, TASK_WOKEN_INIT); + } + + if (msg && *msg) + return cli_dynmsg(appctx, LOG_INFO, msg); + return 1; + + fail: + return cli_dynmsg(appctx, LOG_ERR, msg); +} +#endif /* DEBUG_DEV */ + /* CLI state for "debug dev fd" */ struct dev_fd_ctx { int start_fd; @@ -1531,7 +1633,6 @@ static int debug_parse_cli_fd(char **args, char *payload, struct appctx *appctx, static int debug_iohandler_fd(struct appctx *appctx) { struct dev_fd_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct sockaddr_storage sa; struct stat statbuf; socklen_t salen, vlen; @@ -1540,10 +1641,6 @@ static int debug_iohandler_fd(struct appctx *appctx) int ret = 1; int i, fd; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - chunk_reset(&trash); thread_isolate(); @@ -1693,7 +1790,6 @@ static int debug_iohandler_fd(struct appctx *appctx) } thread_release(); - end: return ret; } @@ -1763,15 +1859,10 @@ static int debug_parse_cli_memstats(char **args, char *payload, struct appctx *a static int debug_iohandler_memstats(struct appctx *appctx) { struct dev_mem_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct mem_stats *ptr; const char *pfx = ctx->match; int ret = 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - if (!ctx->width) { /* we don't know the first column's width, let's compute it * now based on a first pass on printable entries and their @@ -2182,11 +2273,7 @@ static int feed_post_mortem() post_mortem.process.boot_gid = getegid(); getrlimit(RLIMIT_NOFILE, &post_mortem.process.limit_fd); -#if defined(RLIMIT_AS) - getrlimit(RLIMIT_AS, &post_mortem.process.limit_ram); -#elif defined(RLIMIT_DATA) getrlimit(RLIMIT_DATA, &post_mortem.process.limit_ram); -#endif if (strcmp(post_mortem.platform.utsname.sysname, "Linux") == 0) feed_post_mortem_linux(); @@ -2295,6 +2382,9 @@ static struct cli_kw_list cli_kws = {{ },{ {{ "debug", "dev", "sym", NULL }, "debug dev sym <addr> : resolve symbol address", debug_parse_cli_sym, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "task", NULL }, "debug dev task <ptr> [wake|expire|kill] : show/wake/expire/kill task/tasklet", debug_parse_cli_task, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "tkill", NULL }, "debug dev tkill [thr] [sig] : send signal to thread", debug_parse_cli_tkill, NULL, NULL, NULL, ACCESS_EXPERT }, +#if defined(DEBUG_DEV) + {{ "debug", "dev", "trace", NULL }, "debug dev trace [nbthr] : flood traces from that many threads", debug_parse_cli_trace, NULL, NULL, NULL, ACCESS_EXPERT }, +#endif {{ "debug", "dev", "warn", NULL }, "debug dev warn : call WARN_ON() and possibly crash", debug_parse_cli_warn, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "write", NULL }, "debug dev write [size] : write that many bytes in return", debug_parse_cli_write, NULL, NULL, NULL, ACCESS_EXPERT }, @@ -27,10 +27,10 @@ #include <haproxy/cli.h> #include <haproxy/dgram.h> #include <haproxy/dns.h> +#include <haproxy/dns_ring.h> #include <haproxy/errors.h> #include <haproxy/fd.h> #include <haproxy/log.h> -#include <haproxy/ring.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> @@ -108,7 +108,7 @@ int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len) struct ist myist; myist = ist2(buf, len); - ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + ret = dns_ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); if (!ret) { ns->counters->snd_error++; HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock); @@ -131,7 +131,7 @@ int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len) struct ist myist; myist = ist2(buf, len); - ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + ret = dns_ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); if (!ret) { ns->counters->snd_error++; return -1; @@ -290,7 +290,7 @@ static void dns_resolve_send(struct dgram_conn *dgram) { int fd; struct dns_nameserver *ns; - struct ring *ring; + struct dns_ring *ring; struct buffer *buf; uint64_t msg_len; size_t len, cnt, ofs; @@ -407,21 +407,21 @@ int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk) ns->dgram = dgram; dgram->ofs_req = ~0; /* init ring offset */ - dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); + dgram->ring_req = dns_ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); if (!dgram->ring_req) { ha_alert("memory allocation error initializing the ring for nameserver.\n"); goto out; } /* attach the task as reader */ - if (!ring_attach(dgram->ring_req)) { + if (!dns_ring_attach(dgram->ring_req)) { /* mark server attached to the ring */ ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n"); goto out; } return 0; out: - ring_free(dgram->ring_req); + dns_ring_free(dgram->ring_req); free(dgram); @@ -436,14 +436,14 @@ static void dns_session_io_handler(struct appctx *appctx) { struct stconn *sc = appctx_sc(appctx); struct dns_session *ds = appctx->svcctx; - struct ring *ring = &ds->ring; + struct dns_ring *ring = &ds->ring; struct buffer *buf = &ring->buf; uint64_t msg_len; int available_room; size_t len, cnt, ofs; int ret = 0; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -471,7 +471,7 @@ static void dns_session_io_handler(struct appctx *appctx) } HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); + MT_LIST_DELETE(&appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock); HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock); @@ -633,8 +633,8 @@ static void dns_session_io_handler(struct appctx *appctx) if (ret) { /* let's be woken up once new request to write arrived */ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock); - BUG_ON(LIST_INLIST(&appctx->wait_entry)); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); + BUG_ON(MT_LIST_INLIST(&appctx->wait_entry)); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock); applet_have_no_more_data(appctx); } @@ -797,7 +797,7 @@ void dns_session_free(struct dns_session *ds) BUG_ON(!LIST_ISEMPTY(&ds->list)); BUG_ON(!LIST_ISEMPTY(&ds->waiter)); BUG_ON(!LIST_ISEMPTY(&ds->queries)); - BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters)); + BUG_ON(!MT_LIST_ISEMPTY(&ds->ring.waiters)); BUG_ON(!eb_is_empty(&ds->query_ids)); pool_free(dns_session_pool, ds); } @@ -844,12 +844,12 @@ static void dns_session_release(struct appctx *appctx) if (!ds) return; - /* We do not call ring_appctx_detach here + /* We do not call dns_ring_appctx_detach here * because we want to keep readers counters * to retry a conn with a different appctx. */ HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock); - LIST_DEL_INIT(&appctx->wait_entry); + MT_LIST_DELETE(&appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock); dss = ds->dss; @@ -1058,9 +1058,9 @@ struct dns_session *dns_session_new(struct dns_stream_server *dss) if (!ds->tx_ring_area) goto error; - ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE); + dns_ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE); /* never fail because it is the first watcher attached to the ring */ - DISGUISE(ring_attach(&ds->ring)); + DISGUISE(dns_ring_attach(&ds->ring)); if ((ds->task_exp = task_new_here()) == NULL) goto error; @@ -1095,7 +1095,7 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int { struct dns_nameserver *ns = (struct dns_nameserver *)context; struct dns_stream_server *dss = ns->stream; - struct ring *ring = dss->ring_req; + struct dns_ring *ring = dss->ring_req; struct buffer *buf = &ring->buf; uint64_t msg_len; size_t len, cnt, ofs; @@ -1151,7 +1151,7 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int if (!LIST_ISEMPTY(&dss->free_sess)) { ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list); - if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) { + if (dns_ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) { ds->nb_queries++; if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ) LIST_DEL_INIT(&ds->list); @@ -1171,8 +1171,8 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int if (!LIST_ISEMPTY(&dss->idle_sess)) { ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list); - /* ring is empty so this ring_write should never fail */ - ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + /* ring is empty so this dns_ring_write should never fail */ + dns_ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); ds->nb_queries++; LIST_DEL_INIT(&ds->list); @@ -1196,8 +1196,8 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int /* allocate a new session */ ads = dns_session_new(dss); if (ads) { - /* ring is empty so this ring_write should never fail */ - ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + /* ring is empty so this dns_ring_write should never fail */ + dns_ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); ads->nb_queries++; LIST_INSERT(&dss->free_sess, &ads->list); } @@ -1248,7 +1248,7 @@ int dns_stream_init(struct dns_nameserver *ns, struct server *srv) dss->maxconn = srv->maxconn; dss->ofs_req = ~0; /* init ring offset */ - dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); + dss->ring_req = dns_ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); if (!dss->ring_req) { ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id); goto out; @@ -1264,7 +1264,7 @@ int dns_stream_init(struct dns_nameserver *ns, struct server *srv) dss->task_req->context = ns; /* attach the task as reader */ - if (!ring_attach(dss->ring_req)) { + if (!dns_ring_attach(dss->ring_req)) { /* mark server attached to the ring */ ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id); goto out; @@ -1306,7 +1306,7 @@ out: if (dss && dss->task_req) task_destroy(dss->task_req); if (dss && dss->ring_req) - ring_free(dss->ring_req); + dns_ring_free(dss->ring_req); free(dss); return -1; diff --git a/src/dns_ring.c b/src/dns_ring.c new file mode 100644 index 0000000..01ce593 --- /dev/null +++ b/src/dns_ring.c @@ -0,0 +1,225 @@ +/* + * Ring buffer management + * This is a fork of ring.c for DNS usage. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdlib.h> +#include <haproxy/api.h> +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/cli.h> +#include <haproxy/dns_ring.h> +#include <haproxy/sc_strm.h> +#include <haproxy/stconn.h> +#include <haproxy/thread.h> + +/* Initialize a pre-allocated ring with the buffer area + * of size */ +void dns_ring_init(struct dns_ring *ring, void *area, size_t size) +{ + HA_RWLOCK_INIT(&ring->lock); + MT_LIST_INIT(&ring->waiters); + ring->readers_count = 0; + ring->buf = b_make(area, size, 0, 0); + /* write the initial RC byte */ + b_putchr(&ring->buf, 0); +} + +/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on + * allocation failure. + */ +struct dns_ring *dns_ring_new(size_t size) +{ + struct dns_ring *ring = NULL; + void *area = NULL; + + if (size < 2) + goto fail; + + ring = malloc(sizeof(*ring)); + if (!ring) + goto fail; + + area = malloc(size); + if (!area) + goto fail; + + dns_ring_init(ring, area, size); + return ring; + fail: + free(area); + free(ring); + return NULL; +} + +/* destroys and frees ring <ring> */ +void dns_ring_free(struct dns_ring *ring) +{ + if (!ring) + return; + + free(ring->buf.area); + free(ring); +} + +/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg> + * to ring <ring>. The message is sent atomically. It may be truncated to + * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the + * two lists, it's just a convenience to help the caller prepend some prefixes + * when necessary. It takes the ring's write lock to make sure no other thread + * will touch the buffer during the update. Returns the number of bytes sent, + * or <=0 on failure. + */ +ssize_t dns_ring_write(struct dns_ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg) +{ + struct buffer *buf = &ring->buf; + struct appctx *appctx; + size_t totlen = 0; + size_t lenlen; + uint64_t dellen; + int dellenlen; + struct mt_list *elt1, elt2; + ssize_t sent = 0; + int i; + + /* we have to find some room to add our message (the buffer is + * never empty and at least contains the previous counter) and + * to update both the buffer contents and heads at the same + * time (it's doable using atomic ops but not worth the + * trouble, let's just lock). For this we first need to know + * the total message's length. We cannot measure it while + * copying due to the varint encoding of the length. + */ + for (i = 0; i < npfx; i++) + totlen += pfx[i].len; + for (i = 0; i < nmsg; i++) + totlen += msg[i].len; + + if (totlen > maxlen) + totlen = maxlen; + + lenlen = varint_bytes(totlen); + + HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + if (lenlen + totlen + 1 + 1 > b_size(buf)) + goto done_buf; + + while (b_room(buf) < lenlen + totlen + 1) { + /* we need to delete the oldest message (from the end), + * and we have to stop if there's a reader stuck there. + * Unless there's corruption in the buffer it's guaranteed + * that we have enough data to find 1 counter byte, a + * varint-encoded length (1 byte min) and the message + * payload (0 bytes min). + */ + if (*b_head(buf)) + goto done_buf; + dellenlen = b_peek_varint(buf, 1, &dellen); + if (!dellenlen) + goto done_buf; + BUG_ON(b_data(buf) < 1 + dellenlen + dellen); + + b_del(buf, 1 + dellenlen + dellen); + } + + /* OK now we do have room */ + __b_put_varint(buf, totlen); + + totlen = 0; + for (i = 0; i < npfx; i++) { + size_t len = pfx[i].len; + + if (len + totlen > maxlen) + len = maxlen - totlen; + if (len) + __b_putblk(buf, pfx[i].ptr, len); + totlen += len; + } + + for (i = 0; i < nmsg; i++) { + size_t len = msg[i].len; + + if (len + totlen > maxlen) + len = maxlen - totlen; + if (len) + __b_putblk(buf, msg[i].ptr, len); + totlen += len; + } + + *b_tail(buf) = 0; buf->data++; // new read counter + sent = lenlen + totlen + 1; + + /* notify potential readers */ + mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + appctx_wakeup(appctx); + + done_buf: + HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + return sent; +} + +/* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is + * meant to be used by low level appctx code such as CLI or ring forwarding. + * For higher level functions, please see the relevant parts in appctx or CLI. + * It returns non-zero on success or zero on failure if too many users are + * already attached. On success, the caller MUST call dns_ring_detach_appctx() + * to detach itself, even if it was never woken up. + */ +int dns_ring_attach(struct dns_ring *ring) +{ + int users = ring->readers_count; + + do { + if (users >= 255) + return 0; + } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1)); + return 1; +} + +/* detach an appctx from a ring. The appctx is expected to be waiting at offset + * <ofs> relative to the beginning of the storage, or ~0 if not waiting yet. + * Nothing is done if <ring> is NULL. + */ +void dns_ring_detach_appctx(struct dns_ring *ring, struct appctx *appctx, size_t ofs) +{ + if (!ring) + return; + + HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + if (ofs != ~0) { + /* reader was still attached */ + if (ofs < b_head_ofs(&ring->buf)) + ofs += b_size(&ring->buf) - b_head_ofs(&ring->buf); + else + ofs -= b_head_ofs(&ring->buf); + + BUG_ON(ofs >= b_size(&ring->buf)); + MT_LIST_DELETE(&appctx->wait_entry); + HA_ATOMIC_DEC(b_peek(&ring->buf, ofs)); + } + HA_ATOMIC_DEC(&ring->readers_count); + HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/dynbuf.c b/src/dynbuf.c index 712e334..aec9667 100644 --- a/src/dynbuf.c +++ b/src/dynbuf.c @@ -15,10 +15,12 @@ #include <string.h> #include <haproxy/api.h> +#include <haproxy/cfgparse.h> #include <haproxy/dynbuf.h> #include <haproxy/global.h> #include <haproxy/list.h> #include <haproxy/pool.h> +#include <haproxy/tools.h> struct pool_head *pool_head_buffer __read_mostly; @@ -28,13 +30,24 @@ int init_buffer() void *buffer; int thr; int done; + int i; pool_head_buffer = create_pool("buffer", global.tune.bufsize, MEM_F_SHARED|MEM_F_EXACT); if (!pool_head_buffer) return 0; - for (thr = 0; thr < MAX_THREADS; thr++) - LIST_INIT(&ha_thread_ctx[thr].buffer_wq); + /* make sure any change to the queues assignment isn't overlooked */ + BUG_ON(DB_PERMANENT - DB_UNLIKELY - 1 != DYNBUF_NBQ); + BUG_ON(DB_MUX_RX_Q < DB_SE_RX_Q || DB_MUX_RX_Q >= DYNBUF_NBQ); + BUG_ON(DB_SE_RX_Q < DB_CHANNEL_Q || DB_SE_RX_Q >= DYNBUF_NBQ); + BUG_ON(DB_CHANNEL_Q < DB_MUX_TX_Q || DB_CHANNEL_Q >= DYNBUF_NBQ); + BUG_ON(DB_MUX_TX_Q >= DYNBUF_NBQ); + + for (thr = 0; thr < MAX_THREADS; thr++) { + for (i = 0; i < DYNBUF_NBQ; i++) + LIST_INIT(&ha_thread_ctx[thr].buffer_wq[i]); + ha_thread_ctx[thr].bufq_map = 0; + } /* The reserved buffer is what we leave behind us. Thus we always need @@ -102,6 +115,7 @@ void buffer_dump(FILE *o, struct buffer *b, int from, int to) void __offer_buffers(void *from, unsigned int count) { struct buffer_wait *wait, *wait_back; + int q; /* For now, we consider that all objects need 1 buffer, so we can stop * waking up them once we have enough of them to eat all the available @@ -109,18 +123,117 @@ void __offer_buffers(void *from, unsigned int count) * other tasks, but that's a rough estimate. Similarly, for each cached * event we'll need 1 buffer. */ - list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq, list) { - if (!count) - break; - - if (wait->target == from || !wait->wakeup_cb(wait->target)) + for (q = 0; q < DYNBUF_NBQ; q++) { + if (!(th_ctx->bufq_map & (1 << q))) continue; + BUG_ON_HOT(LIST_ISEMPTY(&th_ctx->buffer_wq[q])); + + list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq[q], list) { + if (!count) + break; + + if (wait->target == from || !wait->wakeup_cb(wait->target)) + continue; + + LIST_DEL_INIT(&wait->list); + count--; + } + if (LIST_ISEMPTY(&th_ctx->buffer_wq[q])) + th_ctx->bufq_map &= ~(1 << q); + } +} + +/* config parser for global "tune.buffers.limit", accepts a number >= 0 */ +static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int limit; - LIST_DEL_INIT(&wait->list); - count--; + if (too_many_args(1, args, err, NULL)) + return -1; + + limit = atoi(args[1]); + if (limit < 0) { + memprintf(err, "'%s' expects a non-negative number but got '%s'.", args[0], args[1]); + return -1; + } + + global.tune.buf_limit = limit; + if (global.tune.buf_limit) { + if (global.tune.buf_limit < 3) + global.tune.buf_limit = 3; } + + return 0; } +/* config parser for global "tune.buffers.reserve", accepts a number >= 0 */ +static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int reserve; + + if (too_many_args(1, args, err, NULL)) + return -1; + + reserve = atoi(args[1]); + if (reserve < 0) { + memprintf(err, "'%s' expects a non-negative number but got '%s'.", args[0], args[1]); + return -1; + } + + global.tune.reserved_bufs = reserve; + return 0; +} + +/* allocate emergency buffers for the thread */ +static int alloc_emergency_buffers_per_thread(void) +{ + int idx; + + th_ctx->emergency_bufs_left = global.tune.reserved_bufs; + th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs)); + if (!th_ctx->emergency_bufs) + return 0; + + for (idx = 0; idx < global.tune.reserved_bufs; idx++) { + /* reserved bufs are not subject to the limit, so we must push it */ + if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit)) + _HA_ATOMIC_INC(&pool_head_buffer->limit); + th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); + if (!th_ctx->emergency_bufs[idx]) + return 0; + } + + return 1; +} + +/* frees the thread's emergency buffers */ +static void free_emergency_buffers_per_thread(void) +{ + int idx; + + if (th_ctx->emergency_bufs) { + for (idx = 0; idx < global.tune.reserved_bufs; idx++) + pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]); + } + + ha_free(&th_ctx->emergency_bufs); +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit }, + { CFG_GLOBAL, "tune.buffers.reserve", cfg_parse_tune_buffers_reserve }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread); +REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread); + /* * Local variables: * c-indent-level: 8 diff --git a/src/errors.c b/src/errors.c index 7a2d14a..197a0cd 100644 --- a/src/errors.c +++ b/src/errors.c @@ -90,11 +90,7 @@ static struct ring *startup_logs_from_fd(int fd, int new) if (area == MAP_FAILED || area == NULL) goto error; - if (new) - r = ring_make_from_area(area, STARTUP_LOG_SIZE); - else - r = ring_cast_from_area(area); - + r = ring_make_from_area(area, STARTUP_LOG_SIZE, new); if (r == NULL) goto error; @@ -116,7 +112,7 @@ error: * Once in wait mode, the shm must be copied and closed. * */ -void startup_logs_init() +void startup_logs_init_shm() { struct ring *r = NULL; char *str_fd, *endptr; @@ -180,24 +176,29 @@ error: } -#else /* ! USE_SHM_OPEN */ +#endif /* ! USE_SHM_OPEN */ void startup_logs_init() { +#ifdef USE_SHM_OPEN + startup_logs_init_shm(); +#else /* ! USE_SHM_OPEN */ startup_logs = ring_new(STARTUP_LOG_SIZE); -} - #endif + if (startup_logs) + vma_set_name(ring_allocated_area(startup_logs), + ring_allocated_size(startup_logs), + "errors", "startup_logs"); +} /* free the startup logs, unmap if it was an shm */ void startup_logs_free(struct ring *r) { #ifdef USE_SHM_OPEN if (r == shm_startup_logs) - munmap(r, STARTUP_LOG_SIZE); - else + munmap(ring_allocated_area(r), STARTUP_LOG_SIZE); #endif /* ! USE_SHM_OPEN */ - ring_free(r); + ring_free(r); } /* duplicate a startup logs which was previously allocated in a shm */ @@ -206,12 +207,11 @@ struct ring *startup_logs_dup(struct ring *src) struct ring *dst = NULL; /* must use the size of the previous buffer */ - dst = ring_new(b_size(&src->buf)); + dst = ring_new(ring_allocated_size(src)); if (!dst) goto error; - b_reset(&dst->buf); - b_ncat(&dst->buf, &src->buf, b_data(&src->buf)); + ring_dup(dst, src, ring_size(src)); error: return dst; } diff --git a/src/ev_epoll.c b/src/ev_epoll.c index c42cf2e..352620d 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -275,6 +275,8 @@ static int init_epoll_per_thread() epoll_events = calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents); if (epoll_events == NULL) goto fail_alloc; + vma_set_name_id(epoll_events, sizeof(struct epoll_event) * global.tune.maxpollevents, + "ev_epoll", "epoll_events", tid + 1); if (MAX_THREADS > 1 && tid) { epoll_fd[tid] = epoll_create(global.maxsock + 1); diff --git a/src/ev_evports.c b/src/ev_evports.c index 07676e6..ee357bc 100644 --- a/src/ev_evports.c +++ b/src/ev_evports.c @@ -185,6 +185,14 @@ static void _do_poll(struct poller *p, int exp, int wake) do { int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; int interrupted = 0; + /* Note: normally we should probably expect to pass + * global.tune.maxpollevents here so as to process multiple + * events at once, but it appears unreliable in tests, even + * starting with value 2, and it seems basically nobody's + * using that anymore so it's probably not worth spending days + * investigating this poller more to improve its performance, + * let's switch back to 1. --WT + */ nevlist = 1; /* desired number of events to be retrieved */ timeout_ts.tv_sec = (timeout / 1000); timeout_ts.tv_nsec = (timeout % 1000) * 1000000; @@ -194,6 +202,12 @@ static void _do_poll(struct poller *p, int exp, int wake) evports_evlist_max, &nevlist, /* updated to the number of events retrieved */ &timeout_ts); + + /* Be careful, nevlist here is always updated by the syscall + * even on status == -1, so it must always be respected + * otherwise events are lost. Awkward API BTW, I wonder how + * they thought ENOSYS ought to be handled... -WT + */ if (status != 0) { int e = errno; switch (e) { @@ -206,7 +220,7 @@ static void _do_poll(struct poller *p, int exp, int wake) /* nevlist >= 0 */ break; default: - nevlist = 0; + /* signal or anything else */ interrupted = 1; break; } diff --git a/src/ev_poll.c b/src/ev_poll.c index e98630c..8051420 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -25,6 +25,7 @@ #include <haproxy/signal.h> #include <haproxy/task.h> #include <haproxy/ticks.h> +#include <haproxy/tools.h> #ifndef POLLRDHUP @@ -249,6 +250,8 @@ static int init_poll_per_thread() poll_events = calloc(1, sizeof(struct pollfd) * global.maxsock); if (poll_events == NULL) return 0; + vma_set_name_id(poll_events, sizeof(struct pollfd) * global.maxsock, + "ev_poll", "poll_events", tid + 1); return 1; } @@ -279,8 +282,10 @@ static int _do_init(struct poller *p) if ((fd_evts[DIR_RD] = calloc(1, fd_evts_bytes)) == NULL) goto fail_srevt; + vma_set_name(fd_evts[DIR_RD], fd_evts_bytes, "ev_poll", "fd_evts_rd"); if ((fd_evts[DIR_WR] = calloc(1, fd_evts_bytes)) == NULL) goto fail_swevt; + vma_set_name(fd_evts[DIR_WR], fd_evts_bytes, "ev_poll", "fd_evts_wr"); hap_register_per_thread_init(init_poll_per_thread); hap_register_per_thread_deinit(deinit_poll_per_thread); diff --git a/src/ev_select.c b/src/ev_select.c index eadd588..9588e8a 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -21,6 +21,7 @@ #include <haproxy/global.h> #include <haproxy/task.h> #include <haproxy/ticks.h> +#include <haproxy/tools.h> /* private data */ @@ -223,9 +224,11 @@ static int init_select_per_thread() tmp_evts[DIR_RD] = calloc(1, fd_set_bytes); if (tmp_evts[DIR_RD] == NULL) goto fail; + vma_set_name_id(tmp_evts[DIR_RD], fd_set_bytes, "ev_select", "tmp_evts_rd", tid + 1); tmp_evts[DIR_WR] = calloc(1, fd_set_bytes); if (tmp_evts[DIR_WR] == NULL) goto fail; + vma_set_name_id(tmp_evts[DIR_WR], fd_set_bytes, "ev_select", "tmp_evts_wr", tid + 1); return 1; fail: free(tmp_evts[DIR_RD]); @@ -263,8 +266,10 @@ static int _do_init(struct poller *p) if ((fd_evts[DIR_RD] = calloc(1, fd_set_bytes)) == NULL) goto fail_srevt; + vma_set_name(fd_evts[DIR_RD], fd_set_bytes, "ev_select", "fd_evts_rd"); if ((fd_evts[DIR_WR] = calloc(1, fd_set_bytes)) == NULL) goto fail_swevt; + vma_set_name(fd_evts[DIR_WR], fd_set_bytes, "ev_select", "fd_evts_wr"); hap_register_per_thread_init(init_select_per_thread); hap_register_per_thread_deinit(deinit_select_per_thread); diff --git a/src/event_hdl.c b/src/event_hdl.c index f5bb5b6..f4f7b19 100644 --- a/src/event_hdl.c +++ b/src/event_hdl.c @@ -138,7 +138,7 @@ struct event_hdl_sub_type event_hdl_string_to_sub_type(const char *name) int it; for (it = 0; it < (int)(sizeof(event_hdl_sub_type_map) / sizeof(event_hdl_sub_type_map[0])); it++) { - if (!strcmp(name, event_hdl_sub_type_map[it].name)) + if (strcmp(name, event_hdl_sub_type_map[it].name) == 0) return event_hdl_sub_type_map[it].type; } return EVENT_HDL_SUB_NONE; diff --git a/src/fcgi-app.c b/src/fcgi-app.c index 00562f8..e8117a3 100644 --- a/src/fcgi-app.c +++ b/src/fcgi-app.c @@ -134,16 +134,7 @@ static void fcgi_release_rule(struct fcgi_rule *rule) if (!rule) return; - if (!LIST_ISEMPTY(&rule->value)) { - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, &rule->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + lf_expr_deinit(&rule->value); /* ->cond and ->name are not owned by the rule */ free(rule); } @@ -256,7 +247,7 @@ static int fcgi_flt_check(struct proxy *px, struct flt_conf *fconf) rule->type = crule->type; rule->name = ist(crule->name); rule->cond = crule->cond; - LIST_INIT(&rule->value); + lf_expr_init(&rule->value); if (crule->value) { if (!parse_logformat_string(crule->value, px, &rule->value, LOG_OPT_HTTP, @@ -84,8 +84,8 @@ #if defined(USE_POLL) #include <poll.h> -#include <errno.h> #endif +#include <errno.h> #include <haproxy/api.h> #include <haproxy/activity.h> @@ -981,8 +981,8 @@ void my_closefrom(int start) break; } while (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ENOMEM); - if (ret) - ret = fd - start; + /* always check the whole range */ + ret = fd - start; for (idx = 0; idx < ret; idx++) { if (poll_events[idx].revents & POLLNVAL) @@ -1108,6 +1108,7 @@ void poller_pipe_io_handler(int fd) static int alloc_pollers_per_thread() { fd_updt = calloc(global.maxsock, sizeof(*fd_updt)); + vma_set_name_id(fd_updt, global.maxsock * sizeof(*fd_updt), "fd", "fd_updt", tid + 1); return fd_updt != NULL; } @@ -1158,10 +1159,11 @@ int init_pollers() int p; struct poller *bp; - if ((fdtab_addr = calloc(global.maxsock, sizeof(*fdtab) + 64)) == NULL) { + if ((fdtab_addr = calloc(1, global.maxsock * sizeof(*fdtab) + 64)) == NULL) { ha_alert("Not enough memory to allocate %d entries for fdtab!\n", global.maxsock); goto fail_tab; } + vma_set_name(fdtab_addr, global.maxsock * sizeof(*fdtab) + 64, "fd", "fdtab_addr"); /* always provide an aligned fdtab */ fdtab = (struct fdtab*)((((size_t)fdtab_addr) + 63) & -(size_t)64); @@ -1170,11 +1172,13 @@ int init_pollers() ha_alert("Not enough memory to allocate %d entries for polled_mask!\n", global.maxsock); goto fail_polledmask; } + vma_set_name(polled_mask, global.maxsock * sizeof(*polled_mask), "fd", "polled_mask"); if ((fdinfo = calloc(global.maxsock, sizeof(*fdinfo))) == NULL) { ha_alert("Not enough memory to allocate %d entries for fdinfo!\n", global.maxsock); goto fail_info; } + vma_set_name(fdinfo, global.maxsock * sizeof(*fdinfo), "fd", "fdinfo"); for (p = 0; p < MAX_TGROUPS; p++) update_list[p].first = update_list[p].last = -1; diff --git a/src/flt_bwlim.c b/src/flt_bwlim.c index 66c2883..c5078c8 100644 --- a/src/flt_bwlim.c +++ b/src/flt_bwlim.c @@ -219,26 +219,26 @@ static int bwlim_check(struct proxy *px, struct flt_conf *fconf) target = px->table; if (!target) { - ha_alert("Proxy %s : unable to find table '%s' referenced by bwlim filter '%s'", + ha_alert("Proxy %s : unable to find table '%s' referenced by bwlim filter '%s'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } if ((conf->flags & BWLIM_FL_IN) && !target->data_ofs[STKTABLE_DT_BYTES_IN_RATE]) { ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'." - " It must be 'bytes_in_rate'", + " It must be 'bytes_in_rate'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } else if ((conf->flags & BWLIM_FL_OUT) && !target->data_ofs[STKTABLE_DT_BYTES_OUT_RATE]) { ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'." - " It must be 'bytes_out_rate'", + " It must be 'bytes_out_rate'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } if (!stktable_compatible_sample(conf->expr, target->type)) { - ha_alert("Proxy %s : stick-table '%s' uses a key type incompatible with bwlim filter '%s'", + ha_alert("Proxy %s : stick-table '%s' uses a key type incompatible with bwlim filter '%s'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c index 30f9d2a..e601ff6 100644 --- a/src/flt_http_comp.c +++ b/src/flt_http_comp.c @@ -73,9 +73,9 @@ comp_flt_init(struct proxy *px, struct flt_conf *fconf) static int comp_flt_init_per_thread(struct proxy *px, struct flt_conf *fconf) { - if (b_alloc(&tmpbuf) == NULL) + if (b_alloc(&tmpbuf, DB_PERMANENT) == NULL) return -1; - if (b_alloc(&zbuf) == NULL) + if (b_alloc(&zbuf, DB_PERMANENT) == NULL) return -1; return 0; } diff --git a/src/flt_spoe.c b/src/flt_spoe.c index 43f6bd9..95930f1 100644 --- a/src/flt_spoe.c +++ b/src/flt_spoe.c @@ -249,7 +249,7 @@ static const char *spoe_appctx_state_str[SPOE_APPCTX_ST_END+1] = { static char * generate_pseudo_uuid() { - ha_generate_uuid(&trash); + ha_generate_uuid_v4(&trash); return my_strndup(trash.area, trash.data); } @@ -1131,7 +1131,6 @@ spoe_handle_healthcheck_response(char *frame, size_t size, char *err, int errlen static int spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz) { - struct stconn *sc = appctx_sc(appctx); int ret; uint32_t netint; @@ -1140,15 +1139,8 @@ spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz) netint = htonl(framesz); memcpy(buf, (char *)&netint, 4); ret = applet_putblk(appctx, buf, framesz+4); - if (ret <= 0) { - if (ret == -3 && b_is_null(&sc_ic(sc)->buf)) { - /* WT: is this still needed for the case ret==-3 ? */ - sc_need_room(sc, 0); - return 1; /* retry */ - } - SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO; - return -1; /* error */ - } + if (ret <= 0) + return 1; /* retry */ return framesz; } @@ -1934,7 +1926,7 @@ spoe_handle_appctx(struct appctx *appctx) if (SPOE_APPCTX(appctx) == NULL) return; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -2860,21 +2852,19 @@ spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait) if (buf->size) return 1; - if (LIST_INLIST(&buffer_wait->list)) - LIST_DEL_INIT(&buffer_wait->list); + b_dequeue(buffer_wait); - if (b_alloc(buf)) + if (b_alloc(buf, DB_CHANNEL)) return 1; - LIST_APPEND(&th_ctx->buffer_wq, &buffer_wait->list); + b_requeue(DB_CHANNEL, buffer_wait); return 0; } static void spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait) { - if (LIST_INLIST(&buffer_wait->list)) - LIST_DEL_INIT(&buffer_wait->list); + b_dequeue(buffer_wait); /* Release the buffer if needed */ if (buf->size) { @@ -3022,7 +3012,7 @@ spoe_init(struct proxy *px, struct flt_conf *fconf) /* conf->agent_fe was already initialized during the config * parsing. Finish initialization. */ - conf->agent_fe.last_change = ns_to_sec(now_ns); + conf->agent_fe.fe_counters.last_change = ns_to_sec(now_ns); conf->agent_fe.cap = PR_CAP_FE; conf->agent_fe.mode = PR_MODE_TCP; conf->agent_fe.maxconn = 0; diff --git a/src/frontend.c b/src/frontend.c index ad2e39e..3b3bcbb 100644 --- a/src/frontend.c +++ b/src/frontend.c @@ -55,7 +55,7 @@ int frontend_accept(struct stream *s) if ((fe->mode == PR_MODE_TCP || fe->mode == PR_MODE_HTTP) && (!LIST_ISEMPTY(&fe->loggers))) { - if (likely(!LIST_ISEMPTY(&fe->logformat))) { + if (likely(!lf_expr_isempty(&fe->logformat))) { /* we have the client ip */ if (s->logs.logwait & LW_CLIP) if (!(s->logs.logwait &= ~(LW_CLIP|LW_INIT))) @@ -252,7 +252,7 @@ smp_fetch_fe_req_rate(const struct arg *args, struct sample *smp, const char *kw smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->fe_req_per_sec); + smp->data.u.sint = read_freq_ctr(&px->fe_counters.req_per_sec); return 1; } @@ -272,7 +272,7 @@ smp_fetch_fe_sess_rate(const struct arg *args, struct sample *smp, const char *k smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->fe_sess_per_sec); + smp->data.u.sint = read_freq_ctr(&px->fe_counters.sess_per_sec); return 1; } diff --git a/src/guid.c b/src/guid.c new file mode 100644 index 0000000..f1365b6 --- /dev/null +++ b/src/guid.c @@ -0,0 +1,161 @@ +#include <haproxy/guid.h> + +#include <import/ebistree.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy.h> +#include <haproxy/server-t.h> +#include <haproxy/tools.h> + +/* GUID global tree */ +struct eb_root guid_tree = EB_ROOT_UNIQUE; + +/* Initialize <guid> members. */ +void guid_init(struct guid_node *guid) +{ + guid->node.key = NULL; + guid->node.node.leaf_p = NULL; +} + +/* Insert <objt> into GUID global tree with key <uid>. Must only be called on + * thread isolation. On failure, <errmsg> will be allocated with an error + * description. Caller is responsible to free it. + * + * Returns 0 on success else non-zero. + */ +int guid_insert(enum obj_type *objt, const char *uid, char **errmsg) +{ + struct guid_node *guid = NULL; + struct guid_node *dup; + struct ebpt_node *node; + char *key = NULL; + char *dup_name = NULL; + + if (!guid_is_valid_fmt(uid, errmsg)) + goto err; + + switch (obj_type(objt)) { + case OBJ_TYPE_PROXY: + guid = &__objt_proxy(objt)->guid; + break; + + case OBJ_TYPE_LISTENER: + guid = &__objt_listener(objt)->guid; + break; + + case OBJ_TYPE_SERVER: + guid = &__objt_server(objt)->guid; + break; + + default: + /* No guid support for this objtype. */ + ABORT_NOW(); + return 0; + } + + key = strdup(uid); + if (!key) { + memprintf(errmsg, "key alloc failure"); + goto err; + } + + guid->node.key = key; + node = ebis_insert(&guid_tree, &guid->node); + if (node != &guid->node) { + dup = ebpt_entry(node, struct guid_node, node); + dup_name = guid_name(dup); + memprintf(errmsg, "duplicate entry with %s", dup_name); + goto err; + } + + guid->obj_type = objt; + return 0; + + err: + ha_free(&key); + ha_free(&dup_name); + return 1; +} + +/* Remove <guid> node from GUID global tree. Must only be called on thread + * isolation. Safe to call even if node is not currently stored. + */ +void guid_remove(struct guid_node *guid) +{ + ebpt_delete(&guid->node); + ha_free(&guid->node.key); +} + +/* Retrieve an instance from GUID global tree with key <uid>. + * + * Returns the GUID instance or NULL if key not found. + */ +struct guid_node *guid_lookup(const char *uid) +{ + struct ebpt_node *node = NULL; + struct guid_node *guid = NULL; + + node = ebis_lookup(&guid_tree, uid); + if (node) + guid = ebpt_entry(node, struct guid_node, node); + + return guid; +} + +/* Returns a boolean checking if <uid> respects GUID format. If <errmsg> is not + * NULL, it will be allocated with an error description in case of invalid + * format. + */ +int guid_is_valid_fmt(const char *uid, char **errmsg) +{ + const size_t len = strlen(uid); + const char *c; + + if (!len || len > GUID_MAX_LEN) { + memprintf(errmsg, "invalid length"); + return 0; + } + + c = invalid_char(uid); + if (c) { + memprintf(errmsg, "invalid character '%c'", c[0]); + return 0; + } + + return 1; +} + +/* Generate a user-friendly description for the instance attached via <guid> + * node. The string is dynamically allocated and the caller is responsible to + * free it. + * + * Returns a pointer to the dynamically allocated message. + */ +char *guid_name(const struct guid_node *guid) +{ + char *msg = NULL; + struct proxy *px; + struct listener *l; + struct server *srv; + + switch (obj_type(guid->obj_type)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(guid->obj_type); + return memprintf(&msg, "%s %s", proxy_cap_str(px->cap), px->id); + + case OBJ_TYPE_LISTENER: + l = __objt_listener(guid->obj_type); + return memprintf(&msg, "listener %s (%s:%d)", + l->bind_conf->frontend->id, + l->bind_conf->file, l->bind_conf->line); + + case OBJ_TYPE_SERVER: + srv = __objt_server(guid->obj_type); + return memprintf(&msg, "server %s/%s", srv->proxy->id, srv->id); + + default: + break; + } + + return NULL; +} @@ -183,11 +183,11 @@ int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value) * is hast header, its value is normalized. 0 is returned on success, -1 if the * authority is invalid and -2 if the host is invalid. */ -static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr) +static int h1_validate_connect_authority(struct ist scheme, struct ist authority, struct ist *host_hdr) { struct ist uri_host, uri_port, host, host_port; - if (!isttest(authority)) + if (isttest(scheme) || !isttest(authority)) goto invalid_authority; uri_host = authority; uri_port = http_get_host_port(authority); @@ -575,12 +575,7 @@ int h1_headers_to_hdr_list(char *start, const char *stop, #ifdef HA_UNALIGNED_LE /* speedup: skip bytes not between 0x24 and 0x7e inclusive */ while (ptr <= end - sizeof(int)) { - int x = *(int *)ptr - 0x24242424; - if (x & 0x80808080) - break; - - x -= 0x5b5b5b5b; - if (!(x & 0x80808080)) + if (is_char4_outside(*(uint *)ptr, 0x24, 0x7e)) break; ptr += sizeof(int); @@ -930,14 +925,14 @@ int h1_headers_to_hdr_list(char *start, const char *stop, */ #ifdef HA_UNALIGNED_LE64 while (ptr <= end - sizeof(long)) { - if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) + if (is_char8_below_opt(*(ulong *)ptr, 0x0e)) goto http_msg_hdr_val2; ptr += sizeof(long); } #endif #ifdef HA_UNALIGNED_LE while (ptr <= end - sizeof(int)) { - if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) + if (is_char4_below_opt(*(uint *)ptr, 0x0e)) goto http_msg_hdr_val2; ptr += sizeof(int); } @@ -1105,46 +1100,88 @@ int h1_headers_to_hdr_list(char *start, const char *stop, if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) { struct http_uri_parser parser = http_uri_parser_init(sl.rq.u); - struct ist scheme, authority; + struct ist scheme, authority = IST_NULL; int ret; - scheme = http_parse_scheme(&parser); - authority = http_parse_authority(&parser, 1); - if (sl.rq.meth == HTTP_METH_CONNECT) { - struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL); - - ret = h1_validate_connect_authority(authority, host); - if (ret < 0) { - if (h1m->err_pos < -1) { - state = H1_MSG_LAST_LF; - /* WT: gcc seems to see a path where sl.rq.u.ptr was used - * uninitialized, but it doesn't know that the function is - * called with initial states making this impossible. - */ - ALREADY_CHECKED(sl.rq.u.ptr); - ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ - goto http_msg_invalid; - } - if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + switch (parser.format) { + case URI_PARSER_FORMAT_ASTERISK: + /* We must take care "PRI * HTTP/2.0" is supported here. check for OTHER methods here is enough */ + if ((sl.rq.meth != HTTP_METH_OTHER && sl.rq.meth != HTTP_METH_OPTIONS) || istlen(sl.rq.u) != 1) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; } - } - else if (host_idx != -1 && istlen(authority)) { - struct ist host = hdr[host_idx].v; + break; + + case URI_PARSER_FORMAT_ABSPATH: + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + break; - /* For non-CONNECT method, the authority must match the host header value */ - if (!isteqi(authority, host)) { - ret = h1_validate_mismatch_authority(scheme, authority, host); + case URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY: + scheme = http_parse_scheme(&parser); + if (!isttest(scheme)) { /* scheme not found: MUST be an authority */ + struct ist *host = NULL; + + if (sl.rq.meth != HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + if (host_idx != -1) + host = &hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + ret = h1_validate_connect_authority(scheme, authority, host); if (ret < 0) { if (h1m->err_pos < -1) { state = H1_MSG_LAST_LF; - ptr = host.ptr; /* Set ptr on the error */ + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ goto http_msg_invalid; } if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ + } + } + else { /* Scheme found: MUST be an absolute-URI */ + struct ist host = IST_NULL; + + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + + if (host_idx != -1) + host = hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + /* For non-CONNECT method, the authority must match the host header value */ + if (isttest(host) && !isteqi(authority, host)) { + ret = h1_validate_mismatch_authority(scheme, authority, host); + if (ret < 0) { + if (h1m->err_pos < -1) { + state = H1_MSG_LAST_LF; + ptr = host.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + if (h1m->err_pos == -1) /* capture the error pointer */ + h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + } } } + break; + + default: + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; } } @@ -1227,57 +1264,6 @@ int h1_headers_to_hdr_list(char *start, const char *stop, goto try_again; } -/* This function performs a very minimal parsing of the trailers block present - * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of - * bytes to delete to skip the trailers. It may return 0 if it's missing some - * input data, or < 0 in case of parse error (in which case the caller may have - * to decide how to proceed, possibly eating everything). - */ -int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max) -{ - const char *stop = b_peek(buf, ofs + max); - int count = ofs; - - while (1) { - const char *p1 = NULL, *p2 = NULL; - const char *start = b_peek(buf, count); - const char *ptr = start; - - /* scan current line and stop at LF or CRLF */ - while (1) { - if (ptr == stop) - return 0; - - if (*ptr == '\n') { - if (!p1) - p1 = ptr; - p2 = ptr; - break; - } - - if (*ptr == '\r') { - if (p1) - return -1; - p1 = ptr; - } - - ptr = b_next(buf, ptr); - } - - /* after LF; point to beginning of next line */ - p2 = b_next(buf, p2); - count += b_dist(buf, start, p2); - - /* LF/CRLF at beginning of line => end of trailers at p2. - * Everything was scheduled for forwarding, there's nothing left - * from this message. */ - if (p1 == start) - break; - /* OK, next line then */ - } - return count - ofs; -} - /* Generate a random key for a WebSocket Handshake in respect with rfc6455 * The key is 128-bits long encoded as a base64 string in <key_out> parameter * (25 bytes long). @@ -36,8 +36,8 @@ #include <haproxy/qmux_http.h> #include <haproxy/qpack-dec.h> #include <haproxy/qpack-enc.h> -#include <haproxy/quic_conn-t.h> #include <haproxy/quic_enc.h> +#include <haproxy/quic_fctl.h> #include <haproxy/quic_frame.h> #include <haproxy/stats-t.h> #include <haproxy/tools.h> @@ -58,19 +58,21 @@ static const struct trace_event h3_trace_events[] = { { .mask = H3_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H3 HEADERS frame" }, #define H3_EV_RX_SETTINGS (1ULL << 3) { .mask = H3_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H3 SETTINGS frame" }, -#define H3_EV_TX_DATA (1ULL << 4) +#define H3_EV_TX_FRAME (1ULL << 4) + { .mask = H3_EV_TX_FRAME, .name = "tx_frame", .desc = "transmission of any H3 frame" }, +#define H3_EV_TX_DATA (1ULL << 5) { .mask = H3_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H3 DATA frame" }, -#define H3_EV_TX_HDR (1ULL << 5) +#define H3_EV_TX_HDR (1ULL << 6) { .mask = H3_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H3 HEADERS frame" }, -#define H3_EV_TX_SETTINGS (1ULL << 6) +#define H3_EV_TX_SETTINGS (1ULL << 7) { .mask = H3_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H3 SETTINGS frame" }, -#define H3_EV_H3S_NEW (1ULL << 7) +#define H3_EV_H3S_NEW (1ULL << 8) { .mask = H3_EV_H3S_NEW, .name = "h3s_new", .desc = "new H3 stream" }, -#define H3_EV_H3S_END (1ULL << 8) +#define H3_EV_H3S_END (1ULL << 9) { .mask = H3_EV_H3S_END, .name = "h3s_end", .desc = "H3 stream terminated" }, -#define H3_EV_H3C_NEW (1ULL << 9) +#define H3_EV_H3C_NEW (1ULL << 10) { .mask = H3_EV_H3C_NEW, .name = "h3c_new", .desc = "new H3 connection" }, -#define H3_EV_H3C_END (1ULL << 10) +#define H3_EV_H3C_END (1ULL << 11) { .mask = H3_EV_H3C_END, .name = "h3c_end", .desc = "H3 connection terminated" }, #define H3_EV_STRM_SEND (1ULL << 12) { .mask = H3_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" }, @@ -129,7 +131,7 @@ static uint64_t h3_settings_max_field_section_size = QUIC_VARINT_8_BYTE_MAX; /* struct h3c { struct qcc *qcc; struct qcs *ctrl_strm; /* Control stream */ - enum h3_err err; + int err; uint32_t flags; /* Settings */ @@ -195,7 +197,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_CTRL: if (h3c->flags & H3_CF_UNI_CTRL_SET) { TRACE_ERROR("duplicated control stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_CTRL_SET; @@ -210,7 +213,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_QPACK_DEC: if (h3c->flags & H3_CF_UNI_QPACK_DEC_SET) { TRACE_ERROR("duplicated qpack decoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_QPACK_DEC_SET; @@ -221,7 +225,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_QPACK_ENC: if (h3c->flags & H3_CF_UNI_QPACK_ENC_SET) { TRACE_ERROR("duplicated qpack encoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_QPACK_ENC_SET; @@ -320,7 +325,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype /* cf H3_FT_HEADERS case. */ if (h3s->type == H3S_T_CTRL || (h3s->st_req != H3S_ST_REQ_HEADERS && h3s->st_req != H3S_ST_REQ_DATA)) { - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; } break; @@ -347,7 +352,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * own rules; see Section 9. */ if (h3s->type == H3S_T_CTRL || h3s->st_req == H3S_ST_REQ_TRAILERS) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; case H3_FT_CANCEL_PUSH: @@ -374,9 +379,9 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype */ if (h3s->type != H3S_T_CTRL) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; else if (!(h3c->flags & H3_CF_SETTINGS_RECV)) - ret = H3_MISSING_SETTINGS; + ret = H3_ERR_MISSING_SETTINGS; break; case H3_FT_SETTINGS: @@ -394,7 +399,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * H3_FRAME_UNEXPECTED. */ if (h3s->type != H3S_T_CTRL || h3c->flags & H3_CF_SETTINGS_RECV) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; case H3_FT_PUSH_PROMISE: @@ -406,7 +411,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype */ /* TODO server-side only. */ - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; default: @@ -420,7 +425,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * not satisfy that requirement and SHOULD be treated as an error. */ if (h3s->type == H3S_T_CTRL && !(h3c->flags & H3_CF_SETTINGS_RECV)) - ret = H3_MISSING_SETTINGS; + ret = H3_ERR_MISSING_SETTINGS; break; } @@ -461,7 +466,8 @@ static int h3_check_body_size(struct qcs *qcs, int fin) if (h3s->data_len > h3s->body_len || (fin && h3s->data_len < h3s->body_len)) { TRACE_ERROR("Content-length does not match DATA frame size", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(qcs->qcc, 1); ret = -1; } @@ -527,6 +533,7 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, int cookie = -1, last_cookie = -1, i; const char *ctl; int relaxed = !!(h3c->qcc->proxy->options2 & PR_O2_REQBUG_OK); + int qpack_err; /* RFC 9114 4.1.2. Malformed Requests and Responses * @@ -558,14 +565,16 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, list, sizeof(list) / sizeof(list[0])); if (ret < 0) { TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = -ret; + if ((qpack_err = qpack_err_decode(ret)) >= 0) { + h3c->err = qpack_err; + qcc_report_glitch(qcs->qcc, 1); + } len = -1; goto out; } - if (!qcs_get_buf(qcs, &htx_buf)) { + if (!b_alloc(&htx_buf, DB_SE_RX)) { TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -604,7 +613,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in pseudo-header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -615,7 +625,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isteq(list[hdr_idx].n, ist(":method"))) { if (isttest(meth)) { TRACE_ERROR("duplicated method pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -624,7 +635,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, else if (isteq(list[hdr_idx].n, ist(":path"))) { if (isttest(path)) { TRACE_ERROR("duplicated path pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -636,7 +648,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_range(list[hdr_idx].v, 0, '#'); if (unlikely(ctl) && http_path_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("forbidden character in ':path' pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -648,7 +661,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isttest(scheme)) { /* duplicated pseudo-header */ TRACE_ERROR("duplicated scheme pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -657,20 +671,23 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, else if (isteq(list[hdr_idx].n, ist(":authority"))) { if (isttest(authority)) { TRACE_ERROR("duplicated authority pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) { - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } } else { TRACE_ERROR("unknown pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -687,7 +704,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (!isttest(meth) || !isttest(scheme) || !isttest(path)) { TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -698,7 +716,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, path, ist("HTTP/3.0")); if (!sl) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -710,7 +727,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isttest(authority)) { if (!htx_add_header(htx, ist("host"), authority)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -723,7 +739,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (istmatch(list[hdr_idx].n, ist(":"))) { TRACE_ERROR("pseudo-header field after fields", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -732,7 +749,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, const char c = list[hdr_idx].n.ptr[i]; if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) { TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -753,14 +771,16 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (isteq(list[hdr_idx].n, ist("host"))) { if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) { - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -776,7 +796,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, h3s->flags & H3_SF_HAVE_CLEN); if (ret < 0) { TRACE_ERROR("invalid content-length", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -810,7 +831,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, * connection-specific fields MUST be treated as malformed. */ TRACE_ERROR("invalid connection header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -823,13 +845,13 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, * NOT contain any value other than "trailers". */ TRACE_ERROR("invalid te header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (!htx_add_header(htx, list[hdr_idx].n, list[hdr_idx].v)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -845,21 +867,20 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (!isttest(authority)) { TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (cookie >= 0) { if (http_cookie_merge(htx, list, cookie)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } } if (!htx_add_endof(htx, HTX_BLK_EOH)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -871,7 +892,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, htx = NULL; if (!qcs_attach_sc(qcs, &htx_buf, fin)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -927,6 +947,7 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, struct http_hdr list[global.tune.max_http_hdr]; int hdr_idx, ret; const char *ctl; + int qpack_err; int i; TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); @@ -937,14 +958,16 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, list, sizeof(list) / sizeof(list[0])); if (ret < 0) { TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = -ret; + if ((qpack_err = qpack_err_decode(ret)) >= 0) { + h3c->err = qpack_err; + qcc_report_glitch(qcs->qcc, 1); + } len = -1; goto out; } - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -978,7 +1001,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (istmatch(list[hdr_idx].n, ist(":"))) { TRACE_ERROR("pseudo-header field in trailers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -987,7 +1011,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, const char c = list[hdr_idx].n.ptr[i]; if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) { TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -1002,7 +1027,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, isteq(list[hdr_idx].n, ist("te")) || isteq(list[hdr_idx].n, ist("transfer-encoding"))) { TRACE_ERROR("forbidden HTTP/3 headers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -1021,14 +1047,14 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in trailer value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (!htx_add_trailer(htx, list[hdr_idx].n, list[hdr_idx].v)) { TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -1038,7 +1064,6 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, if (!htx_add_endof(htx, HTX_BLK_EOT)) { TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -1064,8 +1089,6 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, uint64_t len, char fin) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; struct buffer *appbuf; struct htx *htx = NULL; size_t htx_sent = 0; @@ -1074,11 +1097,9 @@ static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - len = -1; - goto out; + goto err; } htx = htx_from_buf(appbuf); @@ -1129,6 +1150,10 @@ static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); return htx_sent; + + err: + TRACE_DEVEL("leaving on error", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); + return -1; } /* Parse a SETTINGS frame of length <len> of payload <buf>. @@ -1150,7 +1175,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, while (b_data(&b)) { if (!b_quic_dec_int(&id, &b, &ret) || !b_quic_dec_int(&value, &b, &ret)) { - h3c->err = H3_FRAME_ERROR; + h3c->err = H3_ERR_FRAME_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; } @@ -1167,7 +1193,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, /* Ignore duplicate check for ID too big used for GREASE. */ if (id < sizeof(mask)) { if (ha_bit_test(id, &mask)) { - h3c->err = H3_SETTINGS_ERROR; + h3c->err = H3_ERR_SETTINGS_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; } ha_bit_set(id, &mask); @@ -1197,7 +1224,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, * their receipt MUST be treated as a connection error of type * H3_SETTINGS_ERROR. */ - h3c->err = H3_SETTINGS_ERROR; + h3c->err = H3_ERR_SETTINGS_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; default: /* MUST be ignored */ @@ -1209,16 +1237,16 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, return ret; } -/* Decode <qcs> remotely initiated bidi-stream. <fin> must be set to indicate - * that we received the last data of the stream. +/* Transcode HTTP/3 payload received in buffer <b> to HTX data for stream + * <qcs>. If <fin> is set, it indicates that no more data will arrive after. * * Returns 0 on success else non-zero. */ -static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) +static ssize_t h3_rcv_buf(struct qcs *qcs, struct buffer *b, int fin) { struct h3s *h3s = qcs->ctx; struct h3c *h3c = h3s->h3c; - ssize_t total = 0, ret; + ssize_t total = 0, ret = 0; TRACE_ENTER(H3_EV_RX_FRAME, qcs->qcc->conn, qcs); @@ -1256,31 +1284,36 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) */ if (h3s->type == H3S_T_CTRL && fin) { TRACE_ERROR("control stream closed by remote peer", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } if (!b_data(b) && fin && quic_stream_is_bidi(qcs->id)) { struct buffer *appbuf; struct htx *htx; + int eom; TRACE_PROTO("received FIN without data", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); goto err; } htx = htx_from_buf(appbuf); - if (!htx_set_eom(htx)) { + eom = htx_set_eom(htx); + htx_to_buf(htx, appbuf); + if (!eom) { TRACE_ERROR("cannot set EOM", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); + goto err; } - htx_to_buf(htx, appbuf); + goto done; } - while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL) && !h3c->err && !h3s->err) { + while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL) && ret >= 0) { uint64_t ftype, flen; char last_stream_frame = 0; @@ -1307,6 +1340,7 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) if ((ret = h3_check_frame_valid(h3c, qcs, ftype))) { TRACE_ERROR("received an invalid frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); qcc_set_error(qcs->qcc, ret, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } @@ -1329,7 +1363,8 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) */ if (flen > QC_S_RX_BUF_SZ) { TRACE_ERROR("received a too big frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_EXCESSIVE_LOAD, 1); + qcc_set_error(qcs->qcc, H3_ERR_EXCESSIVE_LOAD, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } break; @@ -1405,6 +1440,10 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) qcc_set_error(qcs->qcc, h3c->err, 1); return b_data(b); } + else if (unlikely(ret < 0)) { + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); + goto err; + } /* TODO may be useful to wakeup the MUX if blocked due to full buffer. * However, currently, io-cb of MUX does not handle Rx. @@ -1419,17 +1458,6 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) return -1; } -/* Returns buffer for data sending. - * May be NULL if the allocation failed. - */ -static struct buffer *mux_get_buf(struct qcs *qcs) -{ - if (!b_size(&qcs->tx.buf)) - b_alloc(&qcs->tx.buf); - - return &qcs->tx.buf; -} - /* Function used to emit stream data from <qcs> control uni-stream. * * On success return the number of sent bytes. A negative code is used on @@ -1437,13 +1465,14 @@ static struct buffer *mux_get_buf(struct qcs *qcs) */ static int h3_control_send(struct qcs *qcs, void *ctx) { + int err; int ret; struct h3c *h3c = ctx; unsigned char data[(2 + 3) * 2 * QUIC_VARINT_MAX_SIZE]; /* enough for 3 settings */ struct buffer pos, *res; size_t frm_len; - TRACE_ENTER(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); BUG_ON_HOT(h3c->flags & H3_CF_SETTINGS_SENT); @@ -1472,9 +1501,14 @@ static int h3_control_send(struct qcs *qcs, void *ctx) b_quic_enc_int(&pos, h3_settings_max_field_section_size, 0); } - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + if (qfctl_sblocked(&qcs->tx.fc) || qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_ERROR("not enough initial credit for control stream", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + goto err; + } + + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + /* Consider alloc failure fatal for control stream even on conn buf limit. */ + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); goto err; } @@ -1487,22 +1521,21 @@ static int h3_control_send(struct qcs *qcs, void *ctx) ret = b_force_xfer(res, &pos, b_data(&pos)); if (ret > 0) { /* Register qcs for sending before other streams. */ - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, ret); h3c->flags |= H3_CF_SETTINGS_SENT; } - TRACE_LEAVE(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); return -1; } static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer outbuf; struct buffer headers_buf = BUF_NULL; struct buffer *res; @@ -1515,7 +1548,7 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) int hdr; int status = 0; - TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); sl = NULL; hdr = 0; @@ -1537,8 +1570,7 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) } else if (type == HTX_BLK_HDR) { if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) { - TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("too many headers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } list[hdr].n = htx_get_blk_name(htx, blk); @@ -1555,11 +1587,14 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) list[hdr].n = ist(""); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto err; + } + + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } /* At least 5 bytes to store frame type + length as a varint max size */ @@ -1571,11 +1606,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) /* Start the headers after frame type + length */ headers_buf = b_make(b_head(res) + 5, b_size(res) - 5, 0, 0); + TRACE_DATA("encoding HEADERS frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, + qcs->qcc->conn, qcs); if (qpack_encode_field_section_line(&headers_buf)) ABORT_NOW(); if (qpack_encode_int_status(&headers_buf, status)) { - TRACE_ERROR("invalid status code", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("invalid status code", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } @@ -1630,11 +1666,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) break; } - TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + end: + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return -1; } @@ -1646,12 +1683,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) * Caller is responsible to emit an empty QUIC STREAM frame to signal the end * of the stream. * - * Returns the size of HTX blocks removed. + * Returns the size of HTX blocks removed. A negative error code is returned in + * case of a fatal error which should caused a connection closure. */ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer headers_buf = BUF_NULL; struct buffer *res; struct http_hdr list[global.tune.max_http_hdr]; @@ -1661,7 +1698,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) int ret = 0; int hdr; - TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); hdr = 0; for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) { @@ -1675,8 +1712,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) if (type == HTX_BLK_TLR) { if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) { - TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("too many headers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } list[hdr].n = htx_get_blk_name(htx, blk); @@ -1684,8 +1720,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) hdr++; } else { - TRACE_ERROR("unexpected HTX block", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("unexpected HTX block", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } } @@ -1694,22 +1729,41 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) /* No headers encoded here so no need to generate a H3 HEADERS * frame. Mux will send an empty QUIC STREAM frame with FIN. */ - TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DATA("skipping trailer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + + /* Truncate UNUSED / EOT HTX blocks. */ + blk = htx_get_head_blk(htx); + while (blk) { + type = htx_get_blk_type(blk); + ret += htx_get_blksz(blk); + blk = htx_remove_blk(htx, blk); + if (type == HTX_BLK_EOT) + break; + } goto end; } + list[hdr].n = ist(""); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + start: + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto err; + } + + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } /* At least 9 bytes to store frame type + length as a varint max size */ if (b_room(res) < 9) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for trailers frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } /* Force buffer realignment as size required to encode headers is unknown. */ @@ -1719,8 +1773,12 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) headers_buf = b_make(b_peek(res, b_data(res) + 9), b_contig_space(res) - 9, 0, 0); if (qpack_encode_field_section_line(&headers_buf)) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for trailers section line", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } tail = b_tail(&headers_buf); @@ -1740,8 +1798,12 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) } if (qpack_encode_header(&headers_buf, list[hdr].n, list[hdr].v)) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for all trailers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } } @@ -1750,20 +1812,21 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) /* No headers encoded here so no need to generate a H3 HEADERS * frame. Mux will send an empty QUIC STREAM frame with FIN. */ - TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - goto end; + TRACE_DATA("skipping trailer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + } + else { + /* Now that all headers are encoded, we are certain that res + * buffer is big enough. + */ + TRACE_DATA("encoding TRAILERS frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, + qcs->qcc->conn, qcs); + b_putchr(res, 0x01); /* h3 HEADERS frame type */ + if (!b_quic_enc_int(res, b_data(&headers_buf), 8)) + ABORT_NOW(); + b_add(res, b_data(&headers_buf)); } - /* Now that all headers are encoded, we are certain that res buffer is - * big enough. - */ - b_putchr(res, 0x01); /* h3 HEADERS frame type */ - if (!b_quic_enc_int(res, b_data(&headers_buf), 8)) - ABORT_NOW(); - b_add(res, b_data(&headers_buf)); - - end: - ret = 0; + /* Encoding success, truncate HTX blocks until EOT. */ blk = htx_get_head_blk(htx); while (blk) { type = htx_get_blk_type(blk); @@ -1773,23 +1836,28 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) break; } - TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + end: + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return -1; } -/* Returns the total of bytes sent. This corresponds to the +/* Convert a series of HTX data blocks from <htx> buffer of size <count> into + * HTTP/3 frames encoded into <qcs> Tx buffer. The caller must also specify the + * underlying HTX area via <buf> as this will be used if zero-copy can be + * performed. + * + * Returns the total bytes of encoded HTTP/3 payload. This corresponds to the * total bytes of HTX block removed. A negative error code is returned in case * of a fatal error which should caused a connection closure. */ -static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) +static int h3_resp_data_send(struct qcs *qcs, struct htx *htx, + struct buffer *buf, size_t count) { - struct htx *htx; - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer outbuf; struct buffer *res; size_t total = 0; @@ -1797,9 +1865,7 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) struct htx_blk *blk; enum htx_blk_type type; - TRACE_ENTER(H3_EV_TX_DATA, qcs->qcc->conn, qcs); - - htx = htx_from_buf(buf); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); new_frame: if (!count || htx_is_empty(htx)) @@ -1815,31 +1881,43 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) if (type != HTX_BLK_DATA) goto end; - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_DATA, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + goto err; + } + + /* Connection buf limit reached, stconn will subscribe on SEND. */ + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } + /* If HTX contains only one DATA block, try to exchange it with MUX + * buffer to perform zero-copy. This is only achievable if MUX buffer + * is currently empty. + */ if (unlikely(fsize == count && - !b_data(res) && - htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) { + !b_data(res) && + htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) { void *old_area = res->area; - /* map an H2 frame to the HTX block so that we can put the - * frame header there. - */ - *res = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - hsize, fsize + hsize); - outbuf = b_make(b_head(res), hsize, 0, 0); - b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */ - b_quic_enc_int(&outbuf, fsize, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ + TRACE_DATA("perform zero-copy DATA transfer", + H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + + /* remap MUX buffer to HTX area, keep an offset for H3 header. */ + *res = b_make(buf->area, buf->size, + sizeof(struct htx) + blk->addr - hsize, 0); + + /* write H3 header frame before old HTX block. */ + b_putchr(res, 0x00); /* h3 frame type = DATA */ + b_quic_enc_int(res, fsize, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ + b_add(res, fsize); - /* and exchange with our old area */ + /* assign old MUX area to HTX buffer. */ buf->area = old_area; buf->data = buf->head = 0; total += fsize; - fsize = 0; + goto end; } @@ -1851,23 +1929,29 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0); if (b_size(&outbuf) > hsize || !b_space_wraps(res)) break; - b_slow_realign(res, trash.area, b_data(res)); + if (qcc_realign_stream_txbuf(qcs, res)) + break; } - /* Not enough room for headers and at least one data byte, block the - * stream. It is expected that the stream connector layer will subscribe - * on SEND. + /* Not enough room for headers and at least one data byte, try to + * release the current buffer and allocate a new one. If not possible, + * stconn layer will subscribe on SEND. */ if (b_size(&outbuf) <= hsize) { - TRACE_STATE("not enough room for data frame", H3_EV_TX_DATA, qcs->qcc->conn, qcs); - qcs->flags |= QC_SF_BLK_MROOM; - goto end; + TRACE_STATE("not enough room for data frame", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto new_frame; } if (b_size(&outbuf) < hsize + fsize) fsize = b_size(&outbuf) - hsize; BUG_ON(fsize <= 0); + TRACE_DATA("encoding DATA frame", H3_EV_TX_FRAME|H3_EV_TX_DATA, + qcs->qcc->conn, qcs); b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */ b_quic_enc_int(&outbuf, fsize, 0); /* h3 frame length */ @@ -1885,19 +1969,17 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) goto new_frame; end: - TRACE_LEAVE(H3_EV_TX_DATA, qcs->qcc->conn, qcs); + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); return total; err: BUG_ON(total); /* Must return HTX removed size if at least on frame encoded. */ - TRACE_DEVEL("leaving on error", H3_EV_TX_DATA, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); return -1; } static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; size_t total = 0; enum htx_blk_type btype; struct htx *htx; @@ -1910,12 +1992,7 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) htx = htx_from_buf(buf); - if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) - qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; - - while (count && !htx_is_empty(htx) && - !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err) { - + while (count && !htx_is_empty(htx) && qcc_stream_can_send(qcs) && ret >= 0) { idx = htx_get_head(htx); blk = htx_get_blk(htx, idx); btype = htx_get_blk_type(blk); @@ -1937,9 +2014,11 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) break; case HTX_BLK_DATA: - ret = h3_resp_data_send(qcs, buf, count); + ret = h3_resp_data_send(qcs, htx, buf, count); if (ret > 0) { + /* Reload HTX. This is necessary if 0-copy was performed. */ htx = htx_from_buf(buf); + total += ret; count -= ret; if (ret < bsize) @@ -1964,16 +2043,11 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) count -= bsize; break; } - - /* If an error occured, either buffer space or connection error - * must be set to break current loop. - */ - BUG_ON(ret < 0 && !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err); } - /* Interrupt sending on connection error. */ - if (unlikely(h3c->err)) { - qcc_set_error(qcs->qcc, h3c->err, 1); + /* Interrupt sending on fatal error. */ + if (unlikely(ret < 0)) { + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); goto out; } @@ -1998,7 +2072,7 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) /* Generate a STOP_SENDING if full response transferred before * receiving the full request. */ - qcs->err = H3_NO_ERROR; + qcs->err = H3_ERR_NO_ERROR; qcc_abort_stream_read(qcs); } @@ -2011,15 +2085,21 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) static size_t h3_nego_ff(struct qcs *qcs, size_t count) { + int err; struct buffer *res; int hsize; size_t sz, ret = 0; TRACE_ENTER(H3_EV_STRM_SEND, qcs->qcc->conn, qcs); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; + start: + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto end; + } + + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; goto end; } @@ -2028,7 +2108,8 @@ static size_t h3_nego_ff(struct qcs *qcs, size_t count) while (1) { if (b_contig_space(res) >= hsize || !b_space_wraps(res)) break; - b_slow_realign(res, trash.area, b_data(res)); + if (qcc_realign_stream_txbuf(qcs, res)) + break; } /* Not enough room for headers and at least one data byte, block the @@ -2036,9 +2117,13 @@ static size_t h3_nego_ff(struct qcs *qcs, size_t count) * on SEND. */ if (b_contig_space(res) <= hsize) { - qcs->flags |= QC_SF_BLK_MROOM; - qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; - goto end; + if (qcc_release_stream_txbuf(qcs)) { + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + /* Buffer released, restart processing. */ + goto start; } /* Cannot forward more than available room in output buffer */ @@ -2064,6 +2149,8 @@ static size_t h3_done_ff(struct qcs *qcs) h3_debug_printf(stderr, "%s\n", __func__); if (qcs->sd->iobuf.data) { + TRACE_DATA("encoding DATA frame (fast forward)", + H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); b_sub(qcs->sd->iobuf.buf, qcs->sd->iobuf.data); b_putchr(qcs->sd->iobuf.buf, 0x00); /* h3 frame type = DATA */ b_quic_enc_int(qcs->sd->iobuf.buf, qcs->sd->iobuf.data, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ @@ -2105,7 +2192,8 @@ static int h3_close(struct qcs *qcs, enum qcc_app_ops_close_side side) */ if (qcs == h3c->ctrl_strm || h3s->type == H3S_T_CTRL) { TRACE_ERROR("closure detected on control stream", H3_EV_H3S_END, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); + qcc_report_glitch(qcs->qcc, 1); return 1; } @@ -2136,7 +2224,7 @@ static int h3_attach(struct qcs *qcs, void *conn_ctx) */ TRACE_STATE("reject stream higher than goaway", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); qcc_abort_stream_read(qcs); - qcc_reset_stream(qcs, H3_REQUEST_REJECTED); + qcc_reset_stream(qcs, H3_ERR_REQUEST_REJECTED); goto done; } @@ -2187,47 +2275,18 @@ static void h3_detach(struct qcs *qcs) TRACE_LEAVE(H3_EV_H3S_END, qcs->qcc->conn, qcs); } -/* Initialize H3 control stream and prepare SETTINGS emission. - * - * Returns 0 on success else non-zero. - */ -static int h3_finalize(void *ctx) -{ - struct h3c *h3c = ctx; - struct qcc *qcc = h3c->qcc; - struct qcs *qcs; - - TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); - - qcs = qcc_init_stream_local(h3c->qcc, 0); - if (!qcs) { - TRACE_ERROR("cannot init control stream", H3_EV_H3C_NEW, qcc->conn); - goto err; - } - - h3c->ctrl_strm = qcs; - - if (h3_control_send(qcs, h3c) < 0) - goto err; - - TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn); - return 0; - - err: - TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); - return 1; -} - /* Generate a GOAWAY frame for <h3c> connection on the control stream. * * Returns 0 on success else non-zero. */ static int h3_send_goaway(struct h3c *h3c) { + int err; struct qcs *qcs = h3c->ctrl_strm; struct buffer pos, *res; unsigned char data[3 * QUIC_VARINT_MAX_SIZE]; size_t frm_len = quic_int_getsize(h3c->id_goaway); + size_t xfer; TRACE_ENTER(H3_EV_H3C_END, h3c->qcc->conn); @@ -2242,15 +2301,16 @@ static int h3_send_goaway(struct h3c *h3c) b_quic_enc_int(&pos, frm_len, 0); b_quic_enc_int(&pos, h3c->id_goaway, 0); - res = mux_get_buf(qcs); - if (b_is_null(res) || b_room(res) < b_data(&pos)) { - /* Do not try forcefully to emit GOAWAY if no space left. */ + res = qcc_get_stream_txbuf(qcs, &err); + if (!res || b_room(res) < b_data(&pos) || + qfctl_sblocked(&qcs->tx.fc) || qfctl_sblocked(&h3c->qcc->tx.fc)) { + /* Do not try forcefully to emit GOAWAY if no buffer available or not enough space left. */ TRACE_ERROR("cannot send GOAWAY", H3_EV_H3C_END, h3c->qcc->conn, qcs); goto err; } - b_force_xfer(res, &pos, b_data(&pos)); - qcc_send_stream(qcs, 1); + xfer = b_force_xfer(res, &pos, b_data(&pos)); + qcc_send_stream(qcs, 1, xfer); h3c->flags |= H3_CF_GOAWAY_SENT; TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn); @@ -2271,7 +2331,7 @@ static int h3_send_goaway(struct h3c *h3c) static int h3_init(struct qcc *qcc) { struct h3c *h3c; - struct quic_conn *qc = qcc->conn->handle.qc; + const struct listener *li = __objt_listener(qcc->conn->target); TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); @@ -2288,9 +2348,8 @@ static int h3_init(struct qcc *qcc) h3c->id_goaway = 0; qcc->ctx = h3c; - /* TODO cleanup only ref to quic_conn */ h3c->prx_counters = - EXTRA_COUNTERS_GET(qc->li->bind_conf->frontend->extra_counters_fe, + EXTRA_COUNTERS_GET(li->bind_conf->frontend->extra_counters_fe, &h3_stats_module); LIST_INIT(&h3c->buf_wait.list); @@ -2298,10 +2357,43 @@ static int h3_init(struct qcc *qcc) return 1; fail_no_h3: + qcc_set_error(qcc, H3_ERR_INTERNAL_ERROR, 1); TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); return 0; } +/* Initialize H3 control stream and prepare SETTINGS emission. + * + * Returns 0 on success else non-zero. + */ +static int h3_finalize(void *ctx) +{ + struct h3c *h3c = ctx; + struct qcc *qcc = h3c->qcc; + struct qcs *qcs; + + TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); + + qcs = qcc_init_stream_local(qcc, 0); + if (!qcs) { + TRACE_ERROR("cannot init control stream", H3_EV_H3C_NEW, qcc->conn); + goto err; + } + + h3c->ctrl_strm = qcs; + + if (h3_control_send(qcs, h3c) < 0) + goto err; + + TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn); + return 0; + + err: + qcc_set_error(qcc, H3_ERR_INTERNAL_ERROR, 1); + TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); + return 1; +} + /* Send a HTTP/3 GOAWAY followed by a CONNECTION_CLOSE_APP. */ static void h3_shutdown(void *ctx) { @@ -2324,7 +2416,7 @@ static void h3_shutdown(void *ctx) * graceful shutdown SHOULD use the H3_NO_ERROR error code when closing * the connection. */ - h3c->qcc->err = quic_err_app(H3_NO_ERROR); + h3c->qcc->err = quic_err_app(H3_ERR_NO_ERROR); TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn); } @@ -2343,6 +2435,12 @@ static void h3_stats_inc_err_cnt(void *ctx, int err_code) h3_inc_err_cnt(h3c->prx_counters, err_code); } +static void h3_report_susp(void *ctx) +{ + struct h3c *h3c = ctx; + h3c->qcc->err = quic_err_app(H3_ERR_EXCESSIVE_LOAD); +} + static inline const char *h3_ft_str(uint64_t type) { switch (type) { @@ -2389,15 +2487,16 @@ static void h3_trace(enum trace_level level, uint64_t mask, /* HTTP/3 application layer operations */ const struct qcc_app_ops h3_ops = { .init = h3_init, + .finalize = h3_finalize, .attach = h3_attach, - .decode_qcs = h3_decode_qcs, + .rcv_buf = h3_rcv_buf, .snd_buf = h3_snd_buf, .nego_ff = h3_nego_ff, .done_ff = h3_done_ff, .close = h3_close, .detach = h3_detach, - .finalize = h3_finalize, .shutdown = h3_shutdown, .inc_err_cnt = h3_stats_inc_err_cnt, + .report_susp = h3_report_susp, .release = h3_release, }; diff --git a/src/h3_stats.c b/src/h3_stats.c index c96093f..48dac33 100644 --- a/src/h3_stats.c +++ b/src/h3_stats.c @@ -1,4 +1,5 @@ #include <haproxy/h3.h> +#include <haproxy/qpack-t.h> #include <haproxy/stats.h> enum { @@ -35,7 +36,7 @@ enum { H3_STATS_COUNT /* must be the last */ }; -static struct name_desc h3_stats[] = { +static struct stat_col h3_stats[] = { /* h3 frame type counters */ [H3_ST_DATA] = { .name = "h3_data", .desc = "Total number of DATA frames received" }, @@ -128,40 +129,114 @@ static struct h3_counters { long long qpack_decoder_stream_error; /* total number of QPACK_DECODER_STREAM_ERROR errors received */ } h3_counters; -static void h3_fill_stats(void *data, struct field *stats) +static int h3_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h3_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - /* h3 frame type counters */ - stats[H3_ST_DATA] = mkf_u64(FN_COUNTER, counters->h3_data); - stats[H3_ST_HEADERS] = mkf_u64(FN_COUNTER, counters->h3_headers); - stats[H3_ST_CANCEL_PUSH] = mkf_u64(FN_COUNTER, counters->h3_cancel_push); - stats[H3_ST_PUSH_PROMISE] = mkf_u64(FN_COUNTER, counters->h3_push_promise); - stats[H3_ST_MAX_PUSH_ID] = mkf_u64(FN_COUNTER, counters->h3_max_push_id); - stats[H3_ST_GOAWAY] = mkf_u64(FN_COUNTER, counters->h3_goaway); - stats[H3_ST_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_settings); - /* h3 error counters */ - stats[H3_ST_H3_NO_ERROR] = mkf_u64(FN_COUNTER, counters->h3_no_error); - stats[H3_ST_H3_GENERAL_PROTOCOL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error); - stats[H3_ST_H3_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_internal_error); - stats[H3_ST_H3_STREAM_CREATION_ERROR] = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error); - stats[H3_ST_H3_CLOSED_CRITICAL_STREAM] = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream); - stats[H3_ST_H3_FRAME_UNEXPECTED] = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected); - stats[H3_ST_H3_FRAME_ERROR] = mkf_u64(FN_COUNTER, counters->h3_frame_error); - stats[H3_ST_H3_EXCESSIVE_LOAD] = mkf_u64(FN_COUNTER, counters->h3_excessive_load); - stats[H3_ST_H3_ID_ERROR] = mkf_u64(FN_COUNTER, counters->h3_id_error); - stats[H3_ST_H3_SETTINGS_ERROR] = mkf_u64(FN_COUNTER, counters->h3_settings_error); - stats[H3_ST_H3_MISSING_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_missing_settings); - stats[H3_ST_H3_REQUEST_REJECTED] = mkf_u64(FN_COUNTER, counters->h3_request_rejected); - stats[H3_ST_H3_REQUEST_CANCELLED] = mkf_u64(FN_COUNTER, counters->h3_request_cancelled); - stats[H3_ST_H3_REQUEST_INCOMPLETE] = mkf_u64(FN_COUNTER, counters->h3_request_incomplete); - stats[H3_ST_H3_MESSAGE_ERROR] = mkf_u64(FN_COUNTER, counters->h3_message_error); - stats[H3_ST_H3_CONNECT_ERROR] = mkf_u64(FN_COUNTER, counters->h3_connect_error); - stats[H3_ST_H3_VERSION_FALLBACK] = mkf_u64(FN_COUNTER, counters->h3_version_fallback); - /* QPACK error counters */ - stats[H3_ST_QPACK_DECOMPRESSION_FAILED] = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed); - stats[H3_ST_QPACK_ENCODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error); - stats[H3_ST_QPACK_DECODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error); + for (; current_field < H3_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + /* h3 frame type counters */ + case H3_ST_DATA: + metric = mkf_u64(FN_COUNTER, counters->h3_data); + break; + case H3_ST_HEADERS: + metric = mkf_u64(FN_COUNTER, counters->h3_headers); + break; + case H3_ST_CANCEL_PUSH: + metric = mkf_u64(FN_COUNTER, counters->h3_cancel_push); + break; + case H3_ST_PUSH_PROMISE: + metric = mkf_u64(FN_COUNTER, counters->h3_push_promise); + break; + case H3_ST_MAX_PUSH_ID: + metric = mkf_u64(FN_COUNTER, counters->h3_max_push_id); + break; + case H3_ST_GOAWAY: + metric = mkf_u64(FN_COUNTER, counters->h3_goaway); + break; + case H3_ST_SETTINGS: + metric = mkf_u64(FN_COUNTER, counters->h3_settings); + break; + + /* h3 error counters */ + case H3_ST_H3_NO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_no_error); + break; + case H3_ST_H3_GENERAL_PROTOCOL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error); + break; + case H3_ST_H3_INTERNAL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_internal_error); + break; + case H3_ST_H3_STREAM_CREATION_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error); + break; + case H3_ST_H3_CLOSED_CRITICAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream); + break; + case H3_ST_H3_FRAME_UNEXPECTED: + metric = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected); + break; + case H3_ST_H3_FRAME_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_frame_error); + break; + case H3_ST_H3_EXCESSIVE_LOAD: + metric = mkf_u64(FN_COUNTER, counters->h3_excessive_load); + break; + case H3_ST_H3_ID_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_id_error); + break; + case H3_ST_H3_SETTINGS_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_settings_error); + break; + case H3_ST_H3_MISSING_SETTINGS: + metric = mkf_u64(FN_COUNTER, counters->h3_missing_settings); + break; + case H3_ST_H3_REQUEST_REJECTED: + metric = mkf_u64(FN_COUNTER, counters->h3_request_rejected); + break; + case H3_ST_H3_REQUEST_CANCELLED: + metric = mkf_u64(FN_COUNTER, counters->h3_request_cancelled); + break; + case H3_ST_H3_REQUEST_INCOMPLETE: + metric = mkf_u64(FN_COUNTER, counters->h3_request_incomplete); + break; + case H3_ST_H3_MESSAGE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_message_error); + break; + case H3_ST_H3_CONNECT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_connect_error); + break; + case H3_ST_H3_VERSION_FALLBACK: + metric = mkf_u64(FN_COUNTER, counters->h3_version_fallback); + break; + + /* QPACK error counters */ + case H3_ST_QPACK_DECOMPRESSION_FAILED: + metric = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed); + break; + case H3_ST_QPACK_ENCODER_STREAM_ERROR: + metric = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error); + break; + case H3_ST_QPACK_DECODER_STREAM_ERROR: + metric = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } struct stats_module h3_stats_module = { @@ -180,64 +255,64 @@ INITCALL1(STG_REGISTER, stats_register_module, &h3_stats_module); void h3_inc_err_cnt(struct h3_counters *ctrs, int error_code) { switch (error_code) { - case H3_NO_ERROR: + case H3_ERR_NO_ERROR: HA_ATOMIC_INC(&ctrs->h3_no_error); break; - case H3_GENERAL_PROTOCOL_ERROR: + case H3_ERR_GENERAL_PROTOCOL_ERROR: HA_ATOMIC_INC(&ctrs->h3_general_protocol_error); break; - case H3_INTERNAL_ERROR: + case H3_ERR_INTERNAL_ERROR: HA_ATOMIC_INC(&ctrs->h3_internal_error); break; - case H3_STREAM_CREATION_ERROR: + case H3_ERR_STREAM_CREATION_ERROR: HA_ATOMIC_INC(&ctrs->h3_stream_creation_error); break; - case H3_CLOSED_CRITICAL_STREAM: + case H3_ERR_CLOSED_CRITICAL_STREAM: HA_ATOMIC_INC(&ctrs->h3_closed_critical_stream); break; - case H3_FRAME_UNEXPECTED: + case H3_ERR_FRAME_UNEXPECTED: HA_ATOMIC_INC(&ctrs->h3_frame_unexpected); break; - case H3_FRAME_ERROR: + case H3_ERR_FRAME_ERROR: HA_ATOMIC_INC(&ctrs->h3_frame_error); break; - case H3_EXCESSIVE_LOAD: + case H3_ERR_EXCESSIVE_LOAD: HA_ATOMIC_INC(&ctrs->h3_excessive_load); break; - case H3_ID_ERROR: + case H3_ERR_ID_ERROR: HA_ATOMIC_INC(&ctrs->h3_id_error); break; - case H3_SETTINGS_ERROR: + case H3_ERR_SETTINGS_ERROR: HA_ATOMIC_INC(&ctrs->h3_settings_error); break; - case H3_MISSING_SETTINGS: + case H3_ERR_MISSING_SETTINGS: HA_ATOMIC_INC(&ctrs->h3_missing_settings); break; - case H3_REQUEST_REJECTED: + case H3_ERR_REQUEST_REJECTED: HA_ATOMIC_INC(&ctrs->h3_request_rejected); break; - case H3_REQUEST_CANCELLED: + case H3_ERR_REQUEST_CANCELLED: HA_ATOMIC_INC(&ctrs->h3_request_cancelled); break; - case H3_REQUEST_INCOMPLETE: + case H3_ERR_REQUEST_INCOMPLETE: HA_ATOMIC_INC(&ctrs->h3_request_incomplete); break; - case H3_MESSAGE_ERROR: + case H3_ERR_MESSAGE_ERROR: HA_ATOMIC_INC(&ctrs->h3_message_error); break; - case H3_CONNECT_ERROR: + case H3_ERR_CONNECT_ERROR: HA_ATOMIC_INC(&ctrs->h3_connect_error); break; - case H3_VERSION_FALLBACK: + case H3_ERR_VERSION_FALLBACK: HA_ATOMIC_INC(&ctrs->h3_version_fallback); break; - case QPACK_DECOMPRESSION_FAILED: + case QPACK_ERR_DECOMPRESSION_FAILED: HA_ATOMIC_INC(&ctrs->qpack_decompression_failed); break; - case QPACK_ENCODER_STREAM_ERROR: + case QPACK_ERR_ENCODER_STREAM_ERROR: HA_ATOMIC_INC(&ctrs->qpack_encoder_stream_error); break; - case QPACK_DECODER_STREAM_ERROR: + case QPACK_ERR_DECODER_STREAM_ERROR: HA_ATOMIC_INC(&ctrs->qpack_decoder_stream_error); break; default: diff --git a/src/haproxy.c b/src/haproxy.c index 1659d3d..c987fdb 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -60,7 +60,7 @@ #include <assert.h> #endif #if defined(USE_SYSTEMD) -#include <systemd/sd-daemon.h> +#include <haproxy/systemd.h> #endif #include <import/sha1.h> @@ -118,6 +118,7 @@ #include <haproxy/sock.h> #include <haproxy/sock_inet.h> #include <haproxy/ssl_sock.h> +#include <haproxy/stats-file.h> #include <haproxy/stats-t.h> #include <haproxy/stream.h> #include <haproxy/task.h> @@ -209,6 +210,8 @@ struct global global = { .maxsslconn = DEFAULT_MAXSSLCONN, #endif #endif + /* by default allow clients which use a privileged port for TCP only */ + .clt_privileged_ports = HA_PROTO_TCP, /* others NULL OK */ }; @@ -267,6 +270,7 @@ unsigned int warned = 0; unsigned int tainted = 0; unsigned int experimental_directives_allowed = 0; +unsigned int deprecated_directives_allowed = 0; int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum, char **errmsg) @@ -564,9 +568,6 @@ static void display_build_opts() #ifdef BUILD_TARGET "\n TARGET = " BUILD_TARGET #endif -#ifdef BUILD_CPU - "\n CPU = " BUILD_CPU -#endif #ifdef BUILD_CC "\n CC = " BUILD_CC #endif @@ -659,6 +660,7 @@ static void usage(char *name) " -dW fails if any warning is emitted\n" " -dD diagnostic mode : warn about suspicious configuration statements\n" " -dF disable fast-forward\n" + " -dI enable insecure fork\n" " -dZ disable zero-copy forwarding\n" " -sf/-st [pid ]* finishes/terminates old pids.\n" " -x <unix_socket> get listening sockets from a unix socket\n" @@ -721,6 +723,7 @@ static void mworker_reexec(int hardreload) char *msg = NULL; struct rlimit limit; struct mworker_proc *current_child = NULL; + int x_off = 0; /* disable -x by putting -x /dev/null */ mworker_block_signals(); setenv("HAPROXY_MWORKER_REEXEC", "1", 1); @@ -768,6 +771,10 @@ static void mworker_reexec(int hardreload) /* copy the program name */ next_argv[next_argc++] = old_argv[0]; + /* we need to reintroduce /dev/null every time */ + if (old_unixsocket && strcmp(old_unixsocket, "/dev/null") == 0) + x_off = 1; + /* insert the new options just after argv[0] in case we have a -- */ if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) { @@ -791,8 +798,7 @@ static void mworker_reexec(int hardreload) msg = NULL; } } - - if (current_child) { + if (!x_off && current_child) { /* add the -x option with the socketpair of the current worker */ next_argv[next_argc++] = "-x"; if ((next_argv[next_argc++] = memprintf(&msg, "sockpair@%d", current_child->ipc_fd[0])) == NULL) @@ -801,6 +807,12 @@ static void mworker_reexec(int hardreload) } } + if (x_off) { + /* if the cmdline contained a -x /dev/null, continue to use it */ + next_argv[next_argc++] = "-x"; + next_argv[next_argc++] = "/dev/null"; + } + /* copy the previous options */ for (i = 1; i < old_argc; i++) next_argv[next_argc++] = old_argv[i]; @@ -842,8 +854,17 @@ void mworker_reload(int hardreload) } #if defined(USE_SYSTEMD) - if (global.tune.options & GTUNE_USE_SYSTEMD) - sd_notify(0, "RELOADING=1\nSTATUS=Reloading Configuration.\n"); + if (global.tune.options & GTUNE_USE_SYSTEMD) { + struct timespec ts; + + (void)clock_gettime(CLOCK_MONOTONIC, &ts); + + sd_notifyf(0, + "RELOADING=1\n" + "STATUS=Reloading Configuration.\n" + "MONOTONIC_USEC=%" PRIu64 "\n", + (ts.tv_sec * 1000000ULL + ts.tv_nsec / 1000ULL)); + } #endif mworker_reexec(hardreload); } @@ -998,19 +1019,19 @@ static void sig_dump_state(struct sig_handler *sh) chunk_printf(&trash, "SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } else if (p->srv_act == 0) { chunk_printf(&trash, "SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, (p->srv_bck) ? "is running on backup servers" : "has no server available", - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } else { chunk_printf(&trash, "SIGHUP: Proxy %s has %d active servers and %d backup servers available." " Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, p->srv_act, p->srv_bck, - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } ha_warning("%s\n", trash.area); send_log(p, LOG_NOTICE, "%s\n", trash.area); @@ -1625,7 +1646,7 @@ static void init_args(int argc, char **argv) global.tune.options |= GTUNE_USE_FAST_FWD; /* Use fast-forward by default */ /* Use zero-copy forwarding by default */ - global.tune.no_zero_copy_fwd = NO_ZERO_COPY_FWD_QUIC_SND; + global.tune.no_zero_copy_fwd = 0; /* keep a copy of original arguments for the master process */ old_argv = copy_argv(argc, argv); @@ -1679,6 +1700,8 @@ static void init_args(int argc, char **argv) #endif else if (*flag == 'd' && flag[1] == 'F') global.tune.options &= ~GTUNE_USE_FAST_FWD; + else if (*flag == 'd' && flag[1] == 'I') + global.tune.options |= GTUNE_INSECURE_FORK; else if (*flag == 'd' && flag[1] == 'V') global.ssl_server_verify = SSL_SERVER_VERIFY_NONE; else if (*flag == 'd' && flag[1] == 'Z') @@ -2325,6 +2348,7 @@ static void init(int argc, char **argv) } list_for_each_entry(ppcf, &post_proxy_check_list, list) err_code |= ppcf->fct(px); + px->flags |= PR_FL_CHECKED; } if (err_code & (ERR_ABORT|ERR_FATAL)) { ha_alert("Fatal errors found in configuration.\n"); @@ -2354,6 +2378,9 @@ static void init(int argc, char **argv) /* Apply server states */ apply_server_state(); + /* Preload internal counters. */ + apply_stats_file(); + for (px = proxies_list; px; px = px->next) srv_compute_all_admin_states(px); @@ -2794,9 +2821,6 @@ static void init(int argc, char **argv) #ifdef BUILD_TARGET chunk_appendf(&trash, "TARGET='%s'", BUILD_TARGET); #endif -#ifdef BUILD_CPU - chunk_appendf(&trash, " CPU='%s'", BUILD_CPU); -#endif #ifdef BUILD_OPTIONS chunk_appendf(&trash, " %s", BUILD_OPTIONS); #endif @@ -2935,6 +2959,7 @@ void deinit(void) ha_free(&localpeer); ha_free(&global.server_state_base); ha_free(&global.server_state_file); + ha_free(&global.stats_file); task_destroy(idle_conn_task); idle_conn_task = NULL; @@ -3064,7 +3089,7 @@ void run_poll_loop() if (thread_has_tasks()) { activity[tid].wake_tasks++; _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING); - } else if (signal_queue_len) { + } else if (signal_queue_len && tid == 0) { /* this check is required after setting TH_FL_SLEEPING to avoid * a race with wakeup on signals using wake_threads() */ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING); @@ -3146,6 +3171,18 @@ static void *run_thread_poll_loop(void *data) #endif ha_thread_info[tid].stack_top = __builtin_frame_address(0); + /* Assign the ring queue. Contrary to an intuitive thought, this does + * not benefit from locality and it's counter-productive to group + * threads from a same group or range number in the same queue. In some + * sense it arranges us because it means we can use a modulo and ensure + * that even small numbers of threads are well spread. + */ + ha_thread_info[tid].ring_queue = + (tid % MIN(global.nbthread, + (global.tune.ring_queues ? + global.tune.ring_queues : + RING_DFLT_QUEUES))) % RING_WAIT_QUEUES; + /* thread is started, from now on it is not idle nor harmless */ thread_harmless_end(); thread_idle_end(); @@ -3341,9 +3378,6 @@ int main(int argc, char **argv) #ifdef BUILD_TARGET "\n TARGET = " BUILD_TARGET #endif -#ifdef BUILD_CPU - "\n CPU = " BUILD_CPU -#endif #ifdef BUILD_CC "\n CC = " BUILD_CC #endif @@ -3445,18 +3479,6 @@ int main(int argc, char **argv) if (global.rlimit_memmax) { limit.rlim_cur = limit.rlim_max = global.rlimit_memmax * 1048576ULL; -#ifdef RLIMIT_AS - if (setrlimit(RLIMIT_AS, &limit) == -1) { - if (global.tune.options & GTUNE_STRICT_LIMITS) { - ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n", - argv[0], global.rlimit_memmax); - exit(1); - } - else - ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n", - argv[0], global.rlimit_memmax); - } -#else if (setrlimit(RLIMIT_DATA, &limit) == -1) { if (global.tune.options & GTUNE_STRICT_LIMITS) { ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n", @@ -3467,9 +3489,16 @@ int main(int argc, char **argv) ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n", argv[0], global.rlimit_memmax); } -#endif } +#if defined(USE_LINUX_CAP) + /* If CAP_NET_BIND_SERVICE is in binary file permitted set and process + * is started and run under the same non-root user, this allows + * binding to privileged ports. + */ + prepare_caps_from_permitted_set(geteuid(), global.uid, argv[0]); +#endif + /* Try to get the listeners FD from the previous process using * _getsocks on the stat socket, it must never been done in wait mode * and check mode @@ -3599,13 +3628,13 @@ int main(int argc, char **argv) if ((global.mode & (MODE_MWORKER | MODE_DAEMON)) == 0) set_identity(argv[0]); - /* set_identity() above might have dropped LSTCHK_NETADM if - * it changed to a new UID while preserving enough permissions - * to honnor LSTCHK_NETADM. + /* set_identity() above might have dropped LSTCHK_NETADM or/and + * LSTCHK_SYSADM if it changed to a new UID while preserving enough + * permissions to honnor LSTCHK_NETADM/LSTCHK_SYSADM. */ - if ((global.last_checks & LSTCHK_NETADM) && getuid()) { + if ((global.last_checks & (LSTCHK_NETADM|LSTCHK_SYSADM)) && getuid()) { /* If global.uid is present in config, it is already set as euid - * and ruid by set_identity() call just above, so it's better to + * and ruid by set_identity() just above, so it's better to * remind the user to fix uncoherent settings. */ if (global.uid) { @@ -516,7 +516,15 @@ static inline int hlua_timer_check(const struct hlua_timer *timer) /* Interrupts the Lua processing each "hlua_nb_instruction" instructions. * it is used for preventing infinite loops. + */ +static unsigned int hlua_nb_instruction = 0; + +/* Wrapper to retrieve the number of instructions between two interrupts + * depending on user settings and current hlua context. If not already + * explicitly set, we compute the ideal value using hard limits releaved + * by Thierry Fournier's work, whose original notes may be found below: * + * -- * I test the scheer with an infinite loop containing one incrementation * and one test. I run this loop between 10 seconds, I raise a ceil of * 710M loops from one interrupt each 9000 instructions, so I fix the value @@ -537,9 +545,42 @@ static inline int hlua_timer_check(const struct hlua_timer *timer) * 10000 | 710 * 100000 | 710 * 1000000 | 710 + * -- * + * Thanks to his work, we know we can safely use values between 500 and 10000 + * without a significant impact on performance. */ -static unsigned int hlua_nb_instruction = 10000; +static inline unsigned int hlua_get_nb_instruction(struct hlua *hlua) +{ + int ceil = 10000; /* above 10k, no significant performance gain */ + int floor = 500; /* below 500, significant performance loss */ + + if (hlua_nb_instruction) { + /* value enforced by user */ + return hlua_nb_instruction; + } + + /* not set, assign automatic value */ + if (hlua->state_id == 0) { + /* this function is expected to be called during runtime (after config + * parsing), thus global.nb_thread is expected to be set. + */ + BUG_ON(global.nbthread == 0); + + /* main lua stack (shared global lock), take number of threads into + * account in an attempt to reduce thread contention + */ + return MAX(floor, ceil / global.nbthread); + } + else { + /* per-thread lua stack, less contention is expected (no global lock), + * allow up to the maximum number of instructions and hope that the + * user manually yields after heavy (lock dependent) work from lua + * script (e.g.: map manipulation). + */ + return ceil; + } +} /* Descriptor for the memory allocation state. The limit is pre-initialised to * 0 until it is replaced by "tune.lua.maxmem" during the config parsing, or it @@ -1783,7 +1824,7 @@ void hlua_hook(lua_State *L, lua_Debug *ar) /* Try to interrupt the process at the end of the current * unyieldable function. */ - lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_nb_instruction); + lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_get_nb_instruction(hlua)); } /* This function start or resumes the Lua stack execution. If the flag @@ -1823,10 +1864,10 @@ static enum hlua_exec hlua_ctx_resume(struct hlua *lua, int yield_allowed) resume_execution: - /* This hook interrupts the Lua processing each 'hlua_nb_instruction' + /* This hook interrupts the Lua processing each 'hlua_get_nb_instruction() * instructions. it is used for preventing infinite loops. */ - lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_nb_instruction); + lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_get_nb_instruction(lua)); /* Remove all flags except the running flags. */ HLUA_SET_RUN(lua); @@ -2113,7 +2154,7 @@ static int hlua_set_map(lua_State *L) /* This function is an LUA binding. It provides a function * for retrieving a var from the proc scope in core. */ - static int hlua_core_get_var(lua_State *L) +__LJMP static int hlua_core_get_var(lua_State *L) { const char *name; size_t len; @@ -2135,7 +2176,6 @@ static int hlua_set_map(lua_State *L) } return MAY_LJMP(hlua_smp2lua(L, &smp)); - return 1; } /* This function disables the sending of email through the @@ -2415,7 +2455,7 @@ static void hlua_socket_handler(struct appctx *appctx) struct hlua_csk_ctx *ctx = appctx->svcctx; struct stconn *sc = appctx_sc(appctx); - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); notification_wake(&ctx->wake_on_read); notification_wake(&ctx->wake_on_write); @@ -3574,7 +3614,7 @@ __LJMP static struct channel *hlua_checkchannel(lua_State *L, int ud) /* Pushes the channel onto the top of the stack. If the stask does not have a * free slots, the function fails and returns 0; */ -static int hlua_channel_new(lua_State *L, struct channel *channel) +__LJMP static int hlua_channel_new(lua_State *L, struct channel *channel) { /* Check stack size. */ if (!lua_checkstack(L, 3)) @@ -4592,7 +4632,7 @@ __LJMP static struct hlua_smp *hlua_checkfetches(lua_State *L, int ud) /* This function creates and push in the stack a fetch object according * with a current TXN. */ -static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) +__LJMP static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) { struct hlua_smp *hsmp; @@ -4714,7 +4754,7 @@ __LJMP static struct hlua_smp *hlua_checkconverters(lua_State *L, int ud) /* This function creates and push in the stack a Converters object * according with a current TXN. */ -static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) +__LJMP static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) { struct hlua_smp *hsmp; @@ -6095,7 +6135,7 @@ __LJMP static struct hlua_txn *hlua_checkhttp(lua_State *L, int ud) /* This function creates and push in the stack a HTTP object * according with a current TXN. */ -static int hlua_http_new(lua_State *L, struct hlua_txn *txn) +__LJMP static int hlua_http_new(lua_State *L, struct hlua_txn *txn) { struct hlua_txn *htxn; @@ -8131,7 +8171,7 @@ __LJMP static int hlua_get_priv(lua_State *L) * return 0 if the stack does not contains free slots, * otherwise it returns 1. */ -static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags) +__LJMP static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags) { struct hlua_txn *htxn; @@ -8311,30 +8351,25 @@ __LJMP static int hlua_txn_log_alert(lua_State *L) return 0; } -__LJMP static int hlua_txn_set_loglevel(lua_State *L) +__LJMP static int hlua_txn_set_fc_mark(lua_State *L) { struct hlua_txn *htxn; - int ll; + int mark; - MAY_LJMP(check_args(L, 2, "set_loglevel")); + MAY_LJMP(check_args(L, 2, "set_fc_mark")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); - ll = MAY_LJMP(luaL_checkinteger(L, 2)); - - if (ll < -1 || ll > NB_LOG_LEVELS) - WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be one of the following value:" - " core.silent(-1), core.emerg(0), core.alert(1), core.crit(2), core.error(3)," - " core.warning(4), core.notice(5), core.info(6) or core.debug(7)")); + mark = MAY_LJMP(luaL_checkinteger(L, 2)); - htxn->s->logs.level = (ll == -1) ? ll : ll + 1; + conn_set_mark(objt_conn(htxn->s->sess->origin), mark); return 0; } -__LJMP static int hlua_txn_set_tos(lua_State *L) +__LJMP static int hlua_txn_set_fc_tos(lua_State *L) { struct hlua_txn *htxn; int tos; - MAY_LJMP(check_args(L, 2, "set_tos")); + MAY_LJMP(check_args(L, 2, "set_fc_tos")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); tos = MAY_LJMP(luaL_checkinteger(L, 2)); @@ -8342,16 +8377,21 @@ __LJMP static int hlua_txn_set_tos(lua_State *L) return 0; } -__LJMP static int hlua_txn_set_mark(lua_State *L) +__LJMP static int hlua_txn_set_loglevel(lua_State *L) { struct hlua_txn *htxn; - int mark; + int ll; - MAY_LJMP(check_args(L, 2, "set_mark")); + MAY_LJMP(check_args(L, 2, "set_loglevel")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); - mark = MAY_LJMP(luaL_checkinteger(L, 2)); + ll = MAY_LJMP(luaL_checkinteger(L, 2)); - conn_set_mark(objt_conn(htxn->s->sess->origin), mark); + if (ll < -1 || ll > NB_LOG_LEVELS) + WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be one of the following value:" + " core.silent(-1), core.emerg(0), core.alert(1), core.crit(2), core.error(3)," + " core.warning(4), core.notice(5), core.info(6) or core.debug(7)")); + + htxn->s->logs.level = (ll == -1) ? ll : ll + 1; return 0; } @@ -8617,7 +8657,7 @@ __LJMP static int hlua_txn_done(lua_State *L) /* Pushes the TXN reply onto the top of the stack. If the stask does not have a * free slots, the function fails and returns 0; */ -static int hlua_txn_reply_new(lua_State *L) +__LJMP static int hlua_txn_reply_new(lua_State *L) { struct hlua_txn *htxn; const char *reason, *body = NULL; @@ -9575,7 +9615,7 @@ __LJMP static void hlua_event_hdl_cb_push_args(struct hlua_event_sub *hlua_sub, */ px = proxy_find_by_id(e_server->safe.proxy_uuid, PR_CAP_BE, 0); BUG_ON(!px); - server = findserver_unique_id(px, e_server->safe.puid, e_server->safe.rid); + server = server_find_by_id_unique(px, e_server->safe.puid, e_server->safe.rid); if (server) { lua_pushstring(hlua->T, "reference"); hlua_fcn_new_server(hlua->T, server); @@ -9658,14 +9698,14 @@ static struct task *hlua_event_runner(struct task *task, void *context, unsigned error = hlua_tostring_safe(hlua_sub->hlua->T, -1); else error = "critical error"; - ha_alert("Lua event_hdl: %s.\n", error); + SEND_ERR(NULL, "Lua event_hdl: %s.\n", error); hlua_unlock(hlua_sub->hlua); goto skip_event; } /* Check stack available size. */ if (!lua_checkstack(hlua_sub->hlua->T, 5)) { - ha_alert("Lua event_hdl: full stack.\n"); + SEND_ERR(NULL, "Lua event_hdl: full stack.\n"); RESET_SAFE_LJMP(hlua_sub->hlua); goto skip_event; } @@ -13939,9 +13979,11 @@ lua_State *hlua_init_state(int thread_num) hlua_class_function(L, "get_var", hlua_get_var); hlua_class_function(L, "done", hlua_txn_done); hlua_class_function(L, "reply", hlua_txn_reply_new); + hlua_class_function(L, "set_fc_mark", hlua_txn_set_fc_mark); + hlua_class_function(L, "set_fc_tos", hlua_txn_set_fc_tos); hlua_class_function(L, "set_loglevel", hlua_txn_set_loglevel); - hlua_class_function(L, "set_tos", hlua_txn_set_tos); - hlua_class_function(L, "set_mark", hlua_txn_set_mark); + hlua_class_function(L, "set_mark", hlua_txn_set_fc_mark); // DEPRECATED, use set_fc_mark + hlua_class_function(L, "set_tos", hlua_txn_set_fc_tos); // DEPRECATED, use set_fc_tos hlua_class_function(L, "set_priority_class", hlua_txn_set_priority_class); hlua_class_function(L, "set_priority_offset", hlua_txn_set_priority_offset); hlua_class_function(L, "deflog", hlua_txn_deflog); diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index d8dcdfd..0340ce1 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -53,7 +53,7 @@ static int class_stktable_ref; static int class_proxy_list_ref; static int class_server_list_ref; -#define STATS_LEN (MAX((int)ST_F_TOTAL_FIELDS, (int)INF_TOTAL_FIELDS)) +#define STATS_LEN (MAX((int)ST_I_PX_MAX, (int)ST_I_INF_MAX)) static THREAD_LOCAL struct field stats[STATS_LEN]; @@ -377,8 +377,8 @@ static int hlua_get_info(lua_State *L) stats_fill_info(stats, STATS_LEN, 0); lua_newtable(L); - for (i=0; i<INF_TOTAL_FIELDS; i++) { - lua_pushstring(L, info_fields[i].name); + for (i=0; i<ST_I_INF_MAX; i++) { + lua_pushstring(L, stat_cols_info[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -982,6 +982,7 @@ int hlua_stktable_dump(lua_State *L) int i; int skip_entry; void *ptr; + int shard = 0; // FIXME: this should be stored in the context and iterate to scan the table t = hlua_check_stktable(L, 1); type = lua_type(L, 2); @@ -1042,16 +1043,17 @@ int hlua_stktable_dump(lua_State *L) lua_newtable(L); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - eb = ebmb_first(&t->keys); + next_shard: + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + eb = ebmb_first(&t->shards[shard].keys); for (n = eb; n; n = ebmb_next(n)) { ts = ebmb_entry(n, struct stksess, key); if (!ts) { - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); - return 1; + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + goto done; } HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); /* multi condition/value filter */ skip_entry = 0; @@ -1090,7 +1092,7 @@ int hlua_stktable_dump(lua_State *L) } if (skip_entry) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ts->ref_cnt); continue; } @@ -1114,10 +1116,14 @@ int hlua_stktable_dump(lua_State *L) lua_newtable(L); hlua_stktable_entry(L, t, ts); lua_settable(L, -3); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ts->ref_cnt); } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + done: + shard++; + if (shard < CONFIG_HAP_TBL_BUCKETS) + goto next_shard; return 1; } @@ -1152,12 +1158,12 @@ int hlua_listener_get_stats(lua_State *L) return 1; } - stats_fill_li_stats(li->bind_conf->frontend, li, STAT_SHLGNDS, stats, - STATS_LEN, NULL); + stats_fill_li_line(li->bind_conf->frontend, li, STAT_F_SHLGNDS, stats, + STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -1198,12 +1204,12 @@ int hlua_server_get_stats(lua_State *L) return 1; } - stats_fill_sv_stats(srv->proxy, srv, STAT_SHLGNDS, stats, - STATS_LEN, NULL); + stats_fill_sv_line(srv->proxy, srv, STAT_F_SHLGNDS, stats, + STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -1329,14 +1335,14 @@ static int hlua_server_index(struct lua_State *L) { const char *key = lua_tostring(L, 2); - if (!strcmp(key, "name")) { + if (strcmp(key, "name") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of server 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); hlua_server_get_name(L); return 1; } - if (!strcmp(key, "puid")) { + if (strcmp(key, "puid") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of server 'puid' attribute is deprecated and will eventually be removed, please use get_puid() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); @@ -1513,7 +1519,7 @@ int hlua_server_set_addr(lua_State *L) port = NULL; HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); - err = srv_update_addr_port(srv, addr, port, "Lua script"); + err = srv_update_addr_port(srv, addr, port, SERVER_INETADDR_UPDATER_LUA); HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); if (!err) lua_pushnil(L); @@ -1980,14 +1986,14 @@ static int hlua_proxy_index(struct lua_State *L) { const char *key = lua_tostring(L, 2); - if (!strcmp(key, "name")) { + if (strcmp(key, "name") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of proxy 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); hlua_proxy_get_name(L); return 1; } - if (!strcmp(key, "uuid")) { + if (strcmp(key, "uuid") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of proxy 'uuid' attribute is deprecated and will eventually be removed, please use get_uuid() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); @@ -2046,12 +2052,12 @@ int hlua_proxy_get_stats(lua_State *L) px = hlua_check_proxy(L, 1); if (px->cap & PR_CAP_BE) - stats_fill_be_stats(px, STAT_SHLGNDS, stats, STATS_LEN, NULL); + stats_fill_be_line(px, STAT_F_SHLGNDS, stats, STATS_LEN, NULL); else - stats_fill_fe_stats(px, stats, STATS_LEN, NULL); + stats_fill_fe_line(px, 0, stats, STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } diff --git a/src/hq_interop.c b/src/hq_interop.c index 31c2101..c88f888 100644 --- a/src/hq_interop.c +++ b/src/hq_interop.c @@ -8,8 +8,10 @@ #include <haproxy/http.h> #include <haproxy/mux_quic.h> #include <haproxy/qmux_http.h> +#include <haproxy/qmux_trace.h> +#include <haproxy/trace.h> -static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) +static ssize_t hq_interop_rcv_buf(struct qcs *qcs, struct buffer *b, int fin) { struct htx *htx; struct htx_sl *sl; @@ -25,7 +27,7 @@ static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) if (!fin) return 0; - b_alloc(&htx_buf); + b_alloc(&htx_buf, DB_MUX_RX); htx = htx_from_buf(&htx_buf); /* skip method */ @@ -83,34 +85,21 @@ static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) return b_data(b); } -static struct buffer *mux_get_buf(struct qcs *qcs) -{ - if (!b_size(&qcs->tx.buf)) - b_alloc(&qcs->tx.buf); - - return &qcs->tx.buf; -} - static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) { enum htx_blk_type btype; - struct htx *htx; + struct htx *htx = NULL; struct htx_blk *blk; int32_t idx; uint32_t bsize, fsize; - struct buffer *res, outbuf; + struct buffer *res = NULL; size_t total = 0; - - res = mux_get_buf(qcs); - outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0); + int err; htx = htx_from_buf(buf); - if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) - qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; - - while (count && !htx_is_empty(htx) && !(qcs->flags & QC_SF_BLK_MROOM)) { + while (count && !htx_is_empty(htx) && qcc_stream_can_send(qcs)) { /* Not implemented : QUIC on backend side */ idx = htx_get_head(htx); blk = htx_get_blk(htx, idx); @@ -121,18 +110,48 @@ static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, switch (btype) { case HTX_BLK_DATA: + res = qcc_get_stream_txbuf(qcs, &err); + if (!res) { + if (err) + ABORT_NOW(); + goto end; + } + + if (unlikely(fsize == count && + !b_data(res) && + htx_nbblks(htx) == 1 && btype == HTX_BLK_DATA)) { + void *old_area = res->area; + + TRACE_DATA("perform zero-copy DATA transfer", QMUX_EV_STRM_SEND, + qcs->qcc->conn, qcs); + + /* remap MUX buffer to HTX area */ + *res = b_make(buf->area, buf->size, + sizeof(struct htx) + blk->addr, fsize); + + /* assign old MUX area to HTX buffer. */ + buf->area = old_area; + buf->data = buf->head = 0; + total += fsize; + + /* reload HTX with empty buffer. */ + *htx = *htx_from_buf(buf); + goto end; + } + if (fsize > count) fsize = count; - if (b_room(&outbuf) < fsize) - fsize = b_room(&outbuf); + if (b_contig_space(res) < fsize) + fsize = b_contig_space(res); if (!fsize) { - qcs->flags |= QC_SF_BLK_MROOM; - goto end; + /* Release buf and restart parsing if sending still possible. */ + qcc_release_stream_txbuf(qcs); + continue; } - b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize); + b_putblk(res, htx_get_blk_ptr(htx, blk), fsize); total += fsize; count -= fsize; @@ -155,12 +174,56 @@ static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, } end: - b_add(res, b_data(&outbuf)); htx_to_buf(htx, buf); return total; } +static size_t hq_interop_nego_ff(struct qcs *qcs, size_t count) +{ + int err, ret = 0; + struct buffer *res; + + start: + res = qcc_get_stream_txbuf(qcs, &err); + if (!res) { + if (err) + ABORT_NOW(); + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + if (!b_room(res)) { + if (qcc_release_stream_txbuf(qcs)) { + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + goto start; + } + + /* No header required for HTTP/0.9, no need to reserve an offset. */ + qcs->sd->iobuf.buf = res; + qcs->sd->iobuf.offset = 0; + qcs->sd->iobuf.data = 0; + + ret = MIN(count, b_contig_space(res)); + end: + return ret; +} + +static size_t hq_interop_done_ff(struct qcs *qcs) +{ + const size_t ret = qcs->sd->iobuf.data; + + /* No header required for HTTP/0.9, simply mark ff as done. */ + qcs->sd->iobuf.buf = NULL; + qcs->sd->iobuf.offset = 0; + qcs->sd->iobuf.data = 0; + + return ret; +} + static int hq_interop_attach(struct qcs *qcs, void *conn_ctx) { qcs_wait_http_req(qcs); @@ -168,7 +231,9 @@ static int hq_interop_attach(struct qcs *qcs, void *conn_ctx) } const struct qcc_app_ops hq_interop_ops = { - .decode_qcs = hq_interop_decode_qcs, + .rcv_buf = hq_interop_rcv_buf, .snd_buf = hq_interop_snd_buf, + .nego_ff = hq_interop_nego_ff, + .done_ff = hq_interop_done_ff, .attach = hq_interop_attach, }; @@ -12,6 +12,7 @@ #include <ctype.h> #include <haproxy/api.h> +#include <haproxy/cfgparse.h> #include <haproxy/http.h> #include <haproxy/tools.h> @@ -344,6 +345,14 @@ const struct ist http_known_methods[HTTP_METH_OTHER] = { [HTTP_METH_CONNECT] = IST("CONNECT"), }; +/* 500 bits to indicate for each status code from 100 to 599 if it participates + * to the error or failure class. The last 12 bits are not assigned for now. + * Not initialized, has to be done at boot. This is manipulated using + * http_status_{add,del}_range(). + */ +long http_err_status_codes[512 / sizeof(long)] = { }; +long http_fail_status_codes[512 / sizeof(long)] = { }; + /* * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown * ones. @@ -352,15 +361,15 @@ enum http_meth_t find_http_meth(const char *str, const int len) { const struct ist m = ist2(str, len); - if (isteq(m, ist("GET"))) return HTTP_METH_GET; - else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD; - else if (isteq(m, ist("POST"))) return HTTP_METH_POST; - else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT; - else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT; - else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS; - else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE; - else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE; - else return HTTP_METH_OTHER; + if (isteq(m, http_known_methods[HTTP_METH_GET])) return HTTP_METH_GET; + else if (isteq(m, http_known_methods[HTTP_METH_PUT])) return HTTP_METH_PUT; + else if (isteq(m, http_known_methods[HTTP_METH_HEAD])) return HTTP_METH_HEAD; + else if (isteq(m, http_known_methods[HTTP_METH_POST])) return HTTP_METH_POST; + else if (isteq(m, http_known_methods[HTTP_METH_TRACE])) return HTTP_METH_TRACE; + else if (isteq(m, http_known_methods[HTTP_METH_DELETE])) return HTTP_METH_DELETE; + else if (isteq(m, http_known_methods[HTTP_METH_CONNECT])) return HTTP_METH_CONNECT; + else if (isteq(m, http_known_methods[HTTP_METH_OPTIONS])) return HTTP_METH_OPTIONS; + else return HTTP_METH_OTHER; } /* This function returns HTTP_ERR_<num> (enum) matching http status code. @@ -368,28 +377,27 @@ enum http_meth_t find_http_meth(const char *str, const int len) */ int http_get_status_idx(unsigned int status) { - switch (status) { - case 200: return HTTP_ERR_200; - case 400: return HTTP_ERR_400; - case 401: return HTTP_ERR_401; - case 403: return HTTP_ERR_403; - case 404: return HTTP_ERR_404; - case 405: return HTTP_ERR_405; - case 407: return HTTP_ERR_407; - case 408: return HTTP_ERR_408; - case 410: return HTTP_ERR_410; - case 413: return HTTP_ERR_413; - case 421: return HTTP_ERR_421; - case 422: return HTTP_ERR_422; - case 425: return HTTP_ERR_425; - case 429: return HTTP_ERR_429; - case 500: return HTTP_ERR_500; - case 501: return HTTP_ERR_501; - case 502: return HTTP_ERR_502; - case 503: return HTTP_ERR_503; - case 504: return HTTP_ERR_504; - default: return HTTP_ERR_500; - } + /* This table was built using dev/phash and easily finds solutions up + * to 21 different entries and produces much better code with 32 + * (padded with err 500 below as it's the default, though only [19] is + * the real one). + */ + const uchar codes[32] = { + HTTP_ERR_408, HTTP_ERR_200, HTTP_ERR_504, HTTP_ERR_400, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_401, HTTP_ERR_410, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, + HTTP_ERR_500, HTTP_ERR_429, HTTP_ERR_403, HTTP_ERR_500, + HTTP_ERR_421, HTTP_ERR_404, HTTP_ERR_413, HTTP_ERR_500, + HTTP_ERR_422, HTTP_ERR_405, HTTP_ERR_500, HTTP_ERR_501, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_502, + HTTP_ERR_407, HTTP_ERR_500, HTTP_ERR_503, HTTP_ERR_425, + }; + uint hash = ((status * 118) >> 5) % 32; + uint ret = codes[hash]; + + if (http_err_codes[ret] == status) + return ret; + return HTTP_ERR_500; } /* This function returns a reason associated with the HTTP status. @@ -478,6 +486,40 @@ const char *http_get_reason(unsigned int status) } } +/* add status codes from low to high included to status codes array <array> + * which must be compatible with http_err_codes and http_fail_codes (i.e. 512 + * bits each). This is not thread save and is meant for being called during + * boot only. Only status codes 100-599 are permitted. + */ +void http_status_add_range(long *array, uint low, uint high) +{ + low -= 100; + high -= 100; + + BUG_ON(low > 499); + BUG_ON(high > 499); + + while (low <= high) + ha_bit_set(low++, array); +} + +/* remove status codes from low to high included to status codes array <array> + * which must be compatible with http_err_codes and http_fail_codes (i.e. 512 + * bits each). This is not thread save and is meant for being called during + * boot only. Only status codes 100-599 are permitted. + */ +void http_status_del_range(long *array, uint low, uint high) +{ + low -= 100; + high -= 100; + + BUG_ON(low > 499); + BUG_ON(high > 499); + + while (low <= high) + ha_bit_clr(low++, array); +} + /* Returns the ist string corresponding to port part (without ':') in the host * <host>, IST_NULL if no ':' is found or an empty IST if there is no digit. In * the last case, the result is the original ist trimmed to 0. So be sure to test @@ -1431,3 +1473,111 @@ struct ist http_trim_trailing_spht(struct ist value) return ret; } + +/* initialize the required structures and arrays */ +static void _http_init() +{ + /* preset the default status codes that count as errors and failures */ + http_status_add_range(http_err_status_codes, 400, 499); + http_status_add_range(http_fail_status_codes, 500, 599); + http_status_del_range(http_fail_status_codes, 501, 501); + http_status_del_range(http_fail_status_codes, 505, 505); +} +INITCALL0(STG_INIT, _http_init); + +/* + * registered keywords below + */ + +/* parses a global "http-err-codes" and "http-fail-codes" directive. */ +static int http_parse_http_err_fail_codes(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *cmd = args[0]; + const char *p, *b, *e; + int op, low, high; + long *bitfield; + int ret = -1; + + if (strcmp(cmd, "http-err-codes") == 0) + bitfield = http_err_status_codes; + else if (strcmp(cmd, "http-fail-codes") == 0) + bitfield = http_fail_status_codes; + else + ABORT_NOW(); + + if (!*args[1]) { + memprintf(err, "Missing status codes range for '%s'.", cmd); + goto end; + } + + /* operation: <0 = remove, 0 = replace, >0 = add. The operation is only + * reset for each new arg so that we can do +200,300,400 without + * changing the operation. + */ + for (; *(p = *(++args)); ) { + switch (*p) { + case '+': op = 1; p++; break; + case '-': op = -1; p++; break; + default: op = 0; break; + } + + if (!*p) + goto inval; + + while (1) { + b = p; + e = p + strlen(p); + low = read_uint(&p, e); + if (b == e || p == b) + goto inval; + + high = low; + if (*p == '-') { + p++; + b = p; + high = read_uint(&p, e); + if (b == e || p == b || (*p && *p != ',')) + goto inval; + } + else if (*p && *p != ',') + goto inval; + + if (high < low || low < 100 || high > 599) { + memprintf(err, "Invalid status codes range '%s' in '%s'.\n" + " Codes must be between 100 and 599 and ranges in ascending order.", + *args, cmd); + goto end; + } + + if (!op) + memset(bitfield, 0, sizeof(http_err_status_codes)); + if (op >= 0) + http_status_add_range(bitfield, low, high); + if (op < 0) + http_status_del_range(bitfield, low, high); + + if (!*p) + break; + /* skip ',' */ + p++; + } + } + ret = 0; + end: + return ret; + inval: + memprintf(err, "Invalid status codes range '%s' in '%s' at position %lu. Ranges must be in the form [+-]{low[-{high}]}[,...].", + *args, cmd, (ulong)(p - *args)); + goto end; + +} + +static struct cfg_kw_list cfg_kws = {{ },{ + { CFG_GLOBAL, "http-err-codes", http_parse_http_err_fail_codes }, + { CFG_GLOBAL, "http-fail-codes", http_parse_http_err_fail_codes }, + { /* END */ } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/http_act.c b/src/http_act.c index 7d45780..3a902ab 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -46,17 +46,10 @@ */ static void release_http_action(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - istfree(&rule->arg.http.str); if (rule->arg.http.re) regex_free(rule->arg.http.re); - list_for_each_entry_safe(lf, lfb, &rule->arg.http.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rule->arg.http.fmt); } /* Release memory allocated by HTTP actions relying on an http reply. Concretly, @@ -179,7 +172,7 @@ static enum act_parse_ret parse_set_req_line(const char **args, int *orig_arg, s } rule->action_ptr = http_action_set_req_line; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); if (!*args[cur_arg] || (*args[cur_arg + 1] && strcmp(args[cur_arg + 1], "if") != 0 && strcmp(args[cur_arg + 1], "unless") != 0)) { @@ -616,7 +609,7 @@ static enum act_parse_ret parse_replace_uri(const char **args, int *orig_arg, st rule->action_ptr = http_action_replace_uri; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); if (!*args[cur_arg] || !*args[cur_arg+1] || (*args[cur_arg+2] && strcmp(args[cur_arg+2], "if") != 0 && strcmp(args[cur_arg+2], "unless") != 0)) { @@ -680,7 +673,7 @@ static enum act_parse_ret parse_http_set_status(const char **args, int *orig_arg rule->action = ACT_CUSTOM; rule->action_ptr = action_http_set_status; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); /* Check if an argument is available */ if (!*args[*orig_arg]) { @@ -1317,7 +1310,7 @@ static enum act_parse_ret parse_http_auth(const char **args, int *orig_arg, stru rule->flags |= ACT_FLAG_FINAL; rule->action_ptr = http_action_auth; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (strcmp(args[cur_arg], "realm") == 0) { @@ -1497,7 +1490,7 @@ static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg rule->action_ptr = http_action_set_header; } rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg] || !*args[cur_arg+1]) { @@ -1529,10 +1522,6 @@ static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg return ACT_RET_PRS_ERR; } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - /* some characters are totally forbidden in header names and * may happen by accident when writing configs, causing strange * failures in field. Better catch these ones early, nobody will @@ -1623,7 +1612,7 @@ static enum act_parse_ret parse_http_replace_header(const char **args, int *orig rule->action = 1; // replace-value rule->action_ptr = http_action_replace_header; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg] || !*args[cur_arg+1] || !*args[cur_arg+2]) { @@ -1661,10 +1650,6 @@ static enum act_parse_ret parse_http_replace_header(const char **args, int *orig return ACT_RET_PRS_ERR; } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - *orig_arg = cur_arg + 1; return ACT_RET_PRS_OK; } @@ -1726,7 +1711,7 @@ static enum act_parse_ret parse_http_del_header(const char **args, int *orig_arg rule->action = PAT_MATCH_STR; rule->action_ptr = http_action_del_header; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg]) { @@ -1901,23 +1886,10 @@ static enum act_return http_action_set_map(struct act_rule *rule, struct proxy * /* Release memory allocated by an http map/acl action. */ static void release_http_map(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - free(rule->arg.map.ref); - list_for_each_entry_safe(lf, lfb, &rule->arg.map.key, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - if (rule->action == 1) { - list_for_each_entry_safe(lf, lfb, &rule->arg.map.value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + lf_expr_deinit(&rule->arg.map.key); + if (rule->action == 1) + lf_expr_deinit(&rule->arg.map.value); } /* Parse a "add-acl", "del-acl", "set-map" or "del-map" actions. It takes one or @@ -1979,7 +1951,7 @@ static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, s } /* key pattern */ - LIST_INIT(&rule->arg.map.key); + lf_expr_init(&rule->arg.map.key); if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.key, LOG_OPT_HTTP, cap, err)) { free(rule->arg.map.ref); return ACT_RET_PRS_ERR; @@ -1988,17 +1960,13 @@ static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, s if (rule->action == 1) { /* value pattern for set-map only */ cur_arg++; - LIST_INIT(&rule->arg.map.value); + lf_expr_init(&rule->arg.map.value); if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.value, LOG_OPT_HTTP, cap, err)) { free(rule->arg.map.ref); return ACT_RET_PRS_ERR; } } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - *orig_arg = cur_arg + 1; return ACT_RET_PRS_OK; } @@ -2044,13 +2012,14 @@ static enum act_return http_action_track_sc(struct act_rule *rule, struct proxy * but here we're tracking after this ought to have been done so we have * to do it on purpose. */ - if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 400) < 100) { + if (rule->from == ACT_F_HTTP_RES && + http_status_matches(http_err_status_codes, s->txn->status)) { ptr3 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT); ptr4 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE); } - if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 500) < 100 && - s->txn->status != 501 && s->txn->status != 505) { + if (rule->from == ACT_F_HTTP_RES && + http_status_matches(http_fail_status_codes, s->txn->status)) { ptr5 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT); ptr6 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE); } diff --git a/src/http_ana.c b/src/http_ana.c index 178f874..5196341 100644 --- a/src/http_ana.c +++ b/src/http_ana.c @@ -35,6 +35,7 @@ #include <haproxy/sc_strm.h> #include <haproxy/server-t.h> #include <haproxy/stats.h> +#include <haproxy/stats-html.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/trace.h> @@ -328,7 +329,8 @@ int http_wait_for_request(struct stream *s, struct channel *req, int an_bit) return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (sess->listener && sess->listener->counters) _HA_ATOMIC_INC(&sess->listener->counters->internal_errors); @@ -584,7 +586,8 @@ int http_process_req_common(struct stream *s, struct channel *req, int an_bit, s return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -657,7 +660,7 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) * A unique ID is generated even when it is not sent to ensure that the ID can make use of * fetches only available in the HTTP request processing stage. */ - if (!LIST_ISEMPTY(&sess->fe->format_unique_id)) { + if (!lf_expr_isempty(&sess->fe->format_unique_id)) { struct ist unique_id = stream_generate_unique_id(s, &sess->fe->format_unique_id); if (!isttest(unique_id)) { @@ -734,7 +737,8 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -836,7 +840,8 @@ int http_wait_for_request_body(struct stream *s, struct channel *req, int an_bit return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -1084,7 +1089,8 @@ int http_request_forward_body(struct stream *s, struct channel *req, int an_bit) goto return_prx_cond; return_int_err: - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); if (sess->listener && sess->listener->counters) @@ -1241,7 +1247,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) return 0; } - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1335,7 +1341,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) } } - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1360,7 +1366,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) /* 5: write error to client (we don't send any message then) */ else if (sc_ep_test(s->scf, SE_FL_ERR_PENDING)) { - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1444,22 +1450,22 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (sl->flags & HTX_SL_F_CONN_UPG) msg->flags |= HTTP_MSGF_CONN_UPG; - n = txn->status / 100; - if (n < 1 || n > 5) - n = 0; - /* when the client triggers a 4xx from the server, it's most often due * to a missing object or permission. These events should be tracked * because if they happen often, it may indicate a brute force or a * vulnerability scan. */ - if (n == 4) + if (http_status_matches(http_err_status_codes, txn->status)) stream_inc_http_err_ctr(s); - if (n == 5 && txn->status != 501 && txn->status != 505) + if (http_status_matches(http_fail_status_codes, txn->status)) stream_inc_http_fail_ctr(s); if (objt_server(s->target)) { + n = txn->status / 100; + if (n < 1 || n > 5) + n = 0; + _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.rsp[n]); _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.cum_req); } @@ -1557,11 +1563,17 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) txn->flags |= TX_CON_WANT_TUN; } - /* check for NTML authentication headers in 401 (WWW-Authenticate) and - * 407 (Proxy-Authenticate) responses and set the connection to private + /* Check for NTML authentication headers in 401 (WWW-Authenticate) and + * 407 (Proxy-Authenticate) responses and set the connection to + * private. + * + * Note that this is not performed when using a true multiplexer unless + * connection is already attached to the session as nothing prevents it + * from being shared already by several sessions here. */ srv_conn = sc_conn(s->scb); - if (srv_conn) { + if (srv_conn && + (LIST_INLIST(&srv_conn->sess_el) || strcmp(srv_conn->mux->name, "H1") == 0)) { struct ist hdr; struct http_hdr_ctx ctx; @@ -1611,7 +1623,8 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (objt_server(s->target)) _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors); txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; goto return_prx_cond; return_bad_res: @@ -1894,7 +1907,7 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s * bytes from the server, then this is the right moment. We have * to temporarily assign bytes_out to log what we currently have. */ - if (!LIST_ISEMPTY(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) { + if (!lf_expr_isempty(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) { s->logs.t_close = s->logs.t_data; /* to get a valid end date */ s->logs.bytes_out = htx->data; s->do_log(s); @@ -1930,7 +1943,8 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); if (sess->listener && sess->listener->counters) @@ -2198,7 +2212,8 @@ int http_response_forward_body(struct stream *s, struct channel *res, int an_bit _HA_ATOMIC_INC(&sess->listener->counters->internal_errors); if (objt_server(s->target)) _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors); - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; goto return_error; return_bad_res: @@ -2236,7 +2251,7 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc struct buffer *chunk; struct ist status, reason, location; unsigned int flags; - int ret = 1, close = 0; /* Try to keep the connection alive byt default */ + int ret = 1; chunk = alloc_trash_chunk(); if (!chunk) { @@ -2409,9 +2424,6 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc break; } - if (!(txn->req.flags & HTTP_MSGF_BODYLESS) && txn->req.msg_state != HTTP_MSG_DONE) - close = 1; - htx = htx_from_buf(&res->buf); /* Trim any possible response */ channel_htx_truncate(&s->res, htx); @@ -2422,9 +2434,6 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc sl->info.res.status = rule->code; s->txn->status = rule->code; - if (close && !htx_add_header(htx, ist("Connection"), ist("close"))) - goto fail; - if (!htx_add_header(htx, ist("Content-length"), ist("0")) || !htx_add_header(htx, ist("Location"), location)) goto fail; @@ -3877,9 +3886,9 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy ctx->st_code = STAT_STATUS_INIT; ctx->http_px = px; ctx->flags |= uri_auth->flags; - ctx->flags |= STAT_FMT_HTML; /* assume HTML mode by default */ + ctx->flags |= STAT_F_FMT_HTML; /* assume HTML mode by default */ if ((msg->flags & HTTP_MSGF_VER_11) && (txn->meth != HTTP_METH_HEAD)) - ctx->flags |= STAT_CHUNKED; + ctx->flags |= STAT_F_CHUNKED; htx = htxbuf(&req->buf); sl = http_get_stline(htx); @@ -3888,14 +3897,14 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy for (h = lookup; h <= end - 3; h++) { if (memcmp(h, ";up", 3) == 0) { - ctx->flags |= STAT_HIDE_DOWN; + ctx->flags |= STAT_F_HIDE_DOWN; break; } } for (h = lookup; h <= end - 9; h++) { if (memcmp(h, ";no-maint", 9) == 0) { - ctx->flags |= STAT_HIDE_MAINT; + ctx->flags |= STAT_F_HIDE_MAINT; break; } } @@ -3903,7 +3912,7 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (uri_auth->refresh) { for (h = lookup; h <= end - 10; h++) { if (memcmp(h, ";norefresh", 10) == 0) { - ctx->flags |= STAT_NO_REFRESH; + ctx->flags |= STAT_F_NO_REFRESH; break; } } @@ -3911,31 +3920,31 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy for (h = lookup; h <= end - 4; h++) { if (memcmp(h, ";csv", 4) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); break; } } for (h = lookup; h <= end - 6; h++) { if (memcmp(h, ";typed", 6) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); - ctx->flags |= STAT_FMT_TYPED; + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); + ctx->flags |= STAT_F_FMT_TYPED; break; } } for (h = lookup; h <= end - 5; h++) { if (memcmp(h, ";json", 5) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); - ctx->flags |= STAT_FMT_JSON; + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); + ctx->flags |= STAT_F_FMT_JSON; break; } } for (h = lookup; h <= end - 12; h++) { if (memcmp(h, ";json-schema", 12) == 0) { - ctx->flags &= ~STAT_FMT_MASK; - ctx->flags |= STAT_JSON_SCHM; + ctx->flags &= ~STAT_F_FMT_MASK; + ctx->flags |= STAT_F_JSON_SCHM; break; } } @@ -4004,7 +4013,7 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (ret) { /* no rule, or the rule matches */ - ctx->flags |= STAT_ADMIN; + ctx->flags |= STAT_F_ADMIN; break; } } @@ -4012,21 +4021,21 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD) appctx->st0 = STAT_HTTP_HEAD; else if (txn->meth == HTTP_METH_POST) { - if (ctx->flags & STAT_ADMIN) { + if (ctx->flags & STAT_F_ADMIN) { appctx->st0 = STAT_HTTP_POST; if (msg->msg_state < HTTP_MSG_DATA) req->analysers |= AN_REQ_HTTP_BODY; } else { /* POST without admin level */ - ctx->flags &= ~STAT_CHUNKED; + ctx->flags &= ~STAT_F_CHUNKED; ctx->st_code = STAT_STATUS_DENY; appctx->st0 = STAT_HTTP_LAST; } } else { /* Unsupported method */ - ctx->flags &= ~STAT_CHUNKED; + ctx->flags &= ~STAT_F_CHUNKED; ctx->st_code = STAT_STATUS_IVAL; appctx->st0 = STAT_HTTP_LAST; } @@ -4191,7 +4200,6 @@ void http_perform_server_redirect(struct stream *s, struct stconn *sc) s->txn->status = 302; if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || - !htx_add_header(htx, ist("Connection"), ist("close")) || !htx_add_header(htx, ist("Content-length"), ist("0")) || !htx_add_header(htx, ist("Location"), location)) goto fail; @@ -4473,7 +4481,8 @@ int http_forward_proxy_resp(struct stream *s, int final) size_t data; if (final) { - htx->flags |= HTX_FL_PROXY_RESP; + if (s->txn->server_status == -1) + s->txn->server_status = 0; if (!htx_is_empty(htx) && !http_eval_after_res_rules(s)) return 0; diff --git a/src/http_client.c b/src/http_client.c index d7e50c0..6deff05 100644 --- a/src/http_client.c +++ b/src/http_client.c @@ -190,7 +190,6 @@ err: static int hc_cli_io_handler(struct appctx *appctx) { struct hcli_svc_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct httpclient *hc = ctx->hc; struct http_hdr *hdrs, *hdr; @@ -217,10 +216,7 @@ static int hc_cli_io_handler(struct appctx *appctx) } if (ctx->flags & HC_F_RES_BODY) { - int ret; - - ret = httpclient_res_xfer(hc, sc_ib(sc)); - channel_add_input(sc_ic(sc), ret); /* forward what we put in the buffer channel */ + httpclient_res_xfer(hc, &appctx->outbuf); /* remove the flag if the buffer was emptied */ if (httpclient_data(hc)) @@ -281,11 +277,14 @@ int httpclient_req_gen(struct httpclient *hc, const struct ist url, enum http_me struct htx *htx; int err_code = 0; struct ist meth_ist, vsn; - unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_NORMALIZED_URI | HTX_SL_F_HAS_SCHM; + unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_HAS_SCHM | HTX_SL_F_HAS_AUTHORITY; int i; int foundhost = 0, foundaccept = 0, foundua = 0; - if (!b_alloc(&hc->req.buf)) + if (!(hc->flags & HC_F_HTTPPROXY)) + flags |= HTX_SL_F_NORMALIZED_URI; + + if (!b_alloc(&hc->req.buf, DB_CHANNEL)) goto error; if (meth >= HTTP_METH_OTHER) @@ -403,7 +402,7 @@ int httpclient_req_xfer(struct httpclient *hc, struct ist src, int end) int ret = 0; struct htx *htx; - if (!b_alloc(&hc->req.buf)) + if (!b_alloc(&hc->req.buf, DB_CHANNEL)) goto error; htx = htx_from_buf(&hc->req.buf); @@ -704,7 +703,7 @@ void httpclient_applet_io_handler(struct appctx *appctx) uint32_t sz; int ret; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { if (co_data(res)) { htx = htx_from_buf(&res->buf); co_htx_skip(res, htx, co_data(res)); @@ -918,7 +917,7 @@ void httpclient_applet_io_handler(struct appctx *appctx) if (htx_is_empty(htx)) goto out; - if (!b_alloc(&hc->res.buf)) + if (!b_alloc(&hc->res.buf, DB_MUX_TX)) goto out; if (b_full(&hc->res.buf)) @@ -1223,7 +1222,8 @@ struct proxy *httpclient_create_proxy(const char *id) px->timeout.connect = httpclient_timeout_connect; px->timeout.client = TICK_ETERNITY; /* The HTTP Client use the "option httplog" with the global loggers */ - px->conf.logformat_string = httpclient_log_format; + px->logformat.str = httpclient_log_format; + px->logformat.conf.file = strdup("httpclient"); px->http_needed = 1; /* clear HTTP server */ @@ -1343,9 +1343,9 @@ static int httpclient_precheck() httpclient_proxy = httpclient_create_proxy("<HTTPCLIENT>"); if (!httpclient_proxy) - return 1; + return ERR_RETRYABLE; - return 0; + return ERR_NONE; } /* Initialize the logs for every proxy dedicated to the httpclient */ @@ -1376,18 +1376,6 @@ static int httpclient_postcheck_proxy(struct proxy *curproxy) } LIST_APPEND(&curproxy->loggers, &node->list); } - if (curproxy->conf.logformat_string) { - curproxy->conf.args.ctx = ARGC_LOG; - if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat, - LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &errmsg)) { - memprintf(&errmsg, "failed to parse log-format : %s.", errmsg); - err_code |= ERR_ALERT | ERR_FATAL; - goto err; - } - curproxy->conf.args.file = NULL; - curproxy->conf.args.line = 0; - } #ifdef USE_OPENSSL /* initialize the SNI for the SSL servers */ @@ -1401,9 +1389,22 @@ static int httpclient_postcheck_proxy(struct proxy *curproxy) /* init the SNI expression */ /* always use the host header as SNI, without the port */ srv_ssl->sni_expr = strdup("req.hdr(host),field(1,:)"); - err_code |= server_parse_sni_expr(srv_ssl, curproxy, &errmsg); - if (err_code & ERR_CODE) { - memprintf(&errmsg, "failed to configure sni: %s.", errmsg); + srv_ssl->ssl_ctx.sni = _parse_srv_expr(srv_ssl->sni_expr, + &curproxy->conf.args, + NULL, 0, NULL); + if (!srv_ssl->ssl_ctx.sni) { + memprintf(&errmsg, "failed to configure sni."); + err_code |= ERR_ALERT | ERR_FATAL; + goto err; + } + + srv_ssl->pool_conn_name = strdup(srv_ssl->sni_expr); + srv_ssl->pool_conn_name_expr = _parse_srv_expr(srv_ssl->pool_conn_name, + &curproxy->conf.args, + NULL, 0, NULL); + if (!srv_ssl->pool_conn_name_expr) { + memprintf(&errmsg, "failed to configure pool-conn-name."); + err_code |= ERR_ALERT | ERR_FATAL; goto err; } } diff --git a/src/http_fetch.c b/src/http_fetch.c index 1f3e4a0..ad1e8c5 100644 --- a/src/http_fetch.c +++ b/src/http_fetch.c @@ -36,6 +36,7 @@ #include <haproxy/sample.h> #include <haproxy/sc_strm.h> #include <haproxy/stream.h> +#include <haproxy/log.h> #include <haproxy/tools.h> #include <haproxy/version.h> @@ -314,7 +315,7 @@ struct htx *smp_prefetch_htx(struct sample *smp, struct channel *chn, struct che else { if (txn->status == -1) txn->status = sl->info.res.status; - if (!(htx->flags & HTX_FL_PROXY_RESP) && txn->server_status == -1) + if (txn->server_status == -1) txn->server_status = sl->info.res.status; } if (sl->flags & HTX_SL_F_VER_11) @@ -477,7 +478,7 @@ static int smp_fetch_uniqueid(const struct arg *args, struct sample *smp, const { struct ist unique_id; - if (LIST_ISEMPTY(&smp->sess->fe->format_unique_id)) + if (lf_expr_isempty(&smp->sess->fe->format_unique_id)) return 0; if (!smp->strm) diff --git a/src/http_htx.c b/src/http_htx.c index 004d343..36356ed 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -1117,7 +1117,6 @@ error: void release_http_reply(struct http_reply *http_reply) { - struct logformat_node *lf, *lfb; struct http_reply_hdr *hdr, *hdrb; if (!http_reply) @@ -1126,12 +1125,7 @@ void release_http_reply(struct http_reply *http_reply) ha_free(&http_reply->ctype); list_for_each_entry_safe(hdr, hdrb, &http_reply->hdrs, list) { LIST_DELETE(&hdr->list); - list_for_each_entry_safe(lf, lfb, &hdr->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -1141,14 +1135,8 @@ void release_http_reply(struct http_reply *http_reply) } else if (http_reply->type == HTTP_REPLY_RAW) chunk_destroy(&http_reply->body.obj); - else if (http_reply->type == HTTP_REPLY_LOGFMT) { - list_for_each_entry_safe(lf, lfb, &http_reply->body.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + else if (http_reply->type == HTTP_REPLY_LOGFMT) + lf_expr_deinit(&http_reply->body.fmt); free(http_reply); } @@ -1497,7 +1485,6 @@ int http_check_http_reply(struct http_reply *reply, struct proxy *px, char **err struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px, int default_status, char **errmsg) { - struct logformat_node *lf, *lfb; struct http_reply *reply = NULL; struct http_reply_hdr *hdr, *hdrb; struct stat stat; @@ -1682,7 +1669,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc fd = -1; obj[objlen] = '\0'; reply->type = HTTP_REPLY_LOGFMT; - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); cur_arg++; } else if (strcmp(args[cur_arg], "lf-string") == 0) { @@ -1699,7 +1686,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc obj = strdup(args[cur_arg]); objlen = strlen(args[cur_arg]); reply->type = HTTP_REPLY_LOGFMT; - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); cur_arg++; } else if (strcmp(args[cur_arg], "hdr") == 0) { @@ -1722,7 +1709,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc goto error; } LIST_APPEND(&reply->hdrs, &hdr->list); - LIST_INIT(&hdr->value); + lf_expr_init(&hdr->value); hdr->name = ist(strdup(args[cur_arg])); if (!isttest(hdr->name)) { memprintf(errmsg, "out of memory"); @@ -1731,9 +1718,6 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc if (!parse_logformat_string(args[cur_arg+1], px, &hdr->value, LOG_OPT_HTTP, cap, errmsg)) goto error; - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; cur_arg += 2; } else @@ -1778,12 +1762,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc px->conf.args.file, px->conf.args.line); list_for_each_entry_safe(hdr, hdrb, &reply->hdrs, list) { LIST_DELETE(&hdr->list); - list_for_each_entry_safe(lf, lfb, &hdr->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -1811,7 +1790,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc } } else if (reply->type == HTTP_REPLY_LOGFMT) { /* log-format payload using 'lf-file' of 'lf-string' parameter */ - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); if ((reply->status == 204 || reply->status == 304)) { memprintf(errmsg, "No body expected for %d responses", reply->status); goto error; @@ -1822,10 +1801,6 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc } if (!parse_logformat_string(obj, px, &reply->body.fmt, LOG_OPT_HTTP, cap, errmsg)) goto error; - - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; } free(obj); @@ -1853,8 +1828,9 @@ int http_scheme_based_normalize(struct htx *htx) { struct http_hdr_ctx ctx; struct htx_sl *sl; - struct ist uri, scheme, authority, host, port; + struct ist uri, scheme, authority, host, port, path; struct http_uri_parser parser; + int normalize = 0; sl = http_get_stline(htx); @@ -1871,14 +1847,21 @@ int http_scheme_based_normalize(struct htx *htx) /* Extract the port if present in authority */ authority = http_parse_authority(&parser, 1); + path = http_parse_path(&parser); port = http_get_host_port(authority); - if (!isttest(port)) { - /* if no port found, no normalization to proceed */ - return 0; + if (!isttest(port) || !http_is_default_port(scheme, port)) + host = authority; + else { + host = isttrim(authority, istlen(authority) - istlen(port) - 1); + normalize = 1; + } + + if (!isttest(path)) { + path = ist("/"); + normalize = 1; } - host = isttrim(authority, istlen(authority) - istlen(port) - 1); - if (http_is_default_port(scheme, port)) { + if (normalize) { /* reconstruct the uri with removal of the port */ struct buffer *temp = get_trash_chunk(); struct ist meth, vsn; @@ -1894,8 +1877,8 @@ int http_scheme_based_normalize(struct htx *htx) /* reconstruct uri without port */ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr); chunk_istcat(temp, host); - chunk_memcat(temp, istend(authority), istend(uri) - istend(authority)); - uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */ + chunk_istcat(temp, path); + uri = ist2(temp->area + meth.len + vsn.len, host.len + path.len + authority.ptr - uri.ptr); /* uri */ http_replace_stline(htx, meth, uri, vsn); diff --git a/src/http_rules.c b/src/http_rules.c index 192f0c7..6ceacdf 100644 --- a/src/http_rules.c +++ b/src/http_rules.c @@ -320,17 +320,10 @@ struct act_rule *parse_http_after_res_cond(const char **args, const char *file, /* completely free redirect rule */ void http_free_redirect_rule(struct redirect_rule *rdr) { - struct logformat_node *lf, *lfb; - free_acl_cond(rdr->cond); free(rdr->rdr_str); free(rdr->cookie_str); - list_for_each_entry_safe(lf, lfb, &rdr->rdr_fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rdr->rdr_fmt); free(rdr); } @@ -447,7 +440,7 @@ struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, st if (!rule) goto out_of_memory; rule->cond = cond; - LIST_INIT(&rule->rdr_fmt); + lf_expr_init(&rule->rdr_fmt); if (!use_fmt) { /* old-style static redirect rule */ @@ -473,9 +466,6 @@ struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, st if (!parse_logformat_string(destination, curproxy, &rule->rdr_fmt, LOG_OPT_HTTP, cap, errmsg)) { goto err; } - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; } } diff --git a/src/lb_chash.c b/src/lb_chash.c index 4e8fb15..b3e472e 100644 --- a/src/lb_chash.c +++ b/src/lb_chash.c @@ -21,8 +21,9 @@ #include <haproxy/backend.h> #include <haproxy/errors.h> #include <haproxy/queue.h> -#include <haproxy/server-t.h> +#include <haproxy/server.h> #include <haproxy/tools.h> +#include <haproxy/xxhash.h> /* Return next tree node after <node> which must still be in the tree, or be * NULL. Lookup wraps around the end to the beginning. If the next node is the @@ -58,6 +59,77 @@ static inline void chash_dequeue_srv(struct server *s) } } +/* Compute a key that can be used to insert a node into the CHASH tree. Servers + * have a base key, which can be computed in several ways (see + * chash_compute_server_key) and this function uses that seed to generate hash + * keys for however many nodes need to be inserted into the tree. + */ +static inline u32 chash_compute_node_key(struct server *s, unsigned node_index) +{ + return full_hash(s->lb_server_key + node_index); +} + +/* Compute the base server key that will be used to compute node keys. Servers + * may be configured to determine their hashes either from their ID, address, or + * address+port; the latter options allow independent HAProxy instances to agree + * on routing decisions, regardless of their order in the server list (which may + * be arbitrary, since it could depend on factors such as the order of entries + * in a DNS SRV record). If an address is not known or if the server is + * configured with `hash-key id` (the default) then the key will be determined + * from the server's puid. + */ +static inline u32 chash_compute_server_key(struct server *s) +{ + enum srv_hash_key hash_key = s->hash_key; + struct server_inetaddr srv_addr; + u32 key; + + /* If hash-key is addr or addr-port then we need the address, but if we + * can't determine the address then we fall back on hashing the puid. + */ + switch (hash_key) { + case SRV_HASH_KEY_ADDR: + case SRV_HASH_KEY_ADDR_PORT: + server_get_inetaddr(s, &srv_addr); + if (srv_addr.family != AF_INET && srv_addr.family != AF_INET6) { + hash_key = SRV_HASH_KEY_ID; + } + break; + + default: + break; + } + + if (hash_key == SRV_HASH_KEY_ADDR_PORT) { + key = full_hash(srv_addr.port.svc); + } else { + key = 0; + } + + switch (hash_key) { + case SRV_HASH_KEY_ADDR_PORT: + case SRV_HASH_KEY_ADDR: + switch (srv_addr.family) { + case AF_INET: + key = full_hash(key + srv_addr.addr.v4.s_addr); + break; + case AF_INET6: + key = XXH32(srv_addr.addr.v6.s6_addr, 16, key); + break; + default: + break; + } + break; + + case SRV_HASH_KEY_ID: + default: + key = full_hash(s->puid); + break; + } + + return key; +} + /* Adjust the number of entries of a server in its tree. The server must appear * as many times as its weight indicates it. If it's there too often, we remove * the last occurrences. If it's not there enough, we add more occurrences. To @@ -67,6 +139,15 @@ static inline void chash_dequeue_srv(struct server *s) */ static inline void chash_queue_dequeue_srv(struct server *s) { + u32 server_key = chash_compute_server_key(s); + + /* If the server key changed then we must rehash all the nodes. */ + if (server_key != s->lb_server_key) { + chash_dequeue_srv(s); + s->lb_nodes_tot = 0; + s->lb_server_key = server_key; + } + while (s->lb_nodes_now > s->next_eweight) { if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway s->lb_nodes_now = s->lb_nodes_tot; @@ -95,7 +176,7 @@ static inline void chash_queue_dequeue_srv(struct server *s) (s->next_eweight - s->lb_nodes_tot) * sizeof(*s->lb_nodes)); for (j = s->lb_nodes_tot; j < s->next_eweight; j++) { s->lb_nodes[j].server = s; - s->lb_nodes[j].node.key = full_hash(s->puid * SRV_EWGHT_RANGE + j); + s->lb_nodes[j].node.key = chash_compute_node_key(s, j); } s->lb_nodes_tot = s->next_eweight; } @@ -238,9 +319,6 @@ static void chash_update_server_weight(struct server *srv) int old_state, new_state; struct proxy *p = srv->proxy; - if (!srv_lb_status_changed(srv)) - return; - /* If changing the server's weight changes its state, we simply apply * the procedures we already have for status change. If the state * remains down, the server is not in any tree, so it's as easy as @@ -505,9 +583,10 @@ int chash_init_server_tree(struct proxy *p) ha_alert("failed to allocate lb_nodes for server %s.\n", srv->id); return -1; } + srv->lb_server_key = chash_compute_server_key(srv); for (node = 0; node < srv->lb_nodes_tot; node++) { srv->lb_nodes[node].server = srv; - srv->lb_nodes[node].node.key = full_hash(srv->puid * SRV_EWGHT_RANGE + node); + srv->lb_nodes[node].node.key = chash_compute_node_key(srv, node); } if (srv_currently_usable(srv)) diff --git a/src/lb_ss.c b/src/lb_ss.c new file mode 100644 index 0000000..4af031b --- /dev/null +++ b/src/lb_ss.c @@ -0,0 +1,183 @@ +/* + * sticky load-balancing + * + * Copyright 2024 HAProxy Technologies + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <haproxy/api.h> +#include <haproxy/backend.h> +#include <haproxy/lb_ss.h> +#include <haproxy/server-t.h> + +/* this function updates the stick server according to server <srv>'s new state. + * + * The server's lock must be held. The lbprm's lock will be used. + */ +static void ss_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (!srv_lb_status_changed(srv)) + return; + + if (srv_willbe_usable(srv)) + goto out_update_state; + + HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); + + if (!srv_currently_usable(srv)) + /* server was already down */ + goto out_update_backend; + + if (srv->flags & SRV_F_BACKUP) { + p->lbprm.tot_wbck -= srv->cur_eweight; + p->srv_bck--; + } else { + p->lbprm.tot_wact -= srv->cur_eweight; + p->srv_act--; + } + if (srv == p->lbprm.ss.srv) { + /* sticked server is down, elect a new server + * that we will be sticking on. + */ + recalc_server_ss(p); + } + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); + + out_update_state: + srv_lb_commit_status(srv); +} + +/* This function updates the stick server according to server <srv>'s new state. + * + * The server's lock must be held. The lbprm's lock will be used. + */ +static void ss_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (!srv_lb_status_changed(srv)) + return; + + if (!srv_willbe_usable(srv)) + goto out_update_state; + + HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); + + if (srv_currently_usable(srv)) + /* server was already up */ + goto out_update_backend; + + if (srv->flags & SRV_F_BACKUP) { + p->lbprm.tot_wbck += srv->next_eweight; + p->srv_bck++; + } else { + p->lbprm.tot_wact += srv->next_eweight; + p->srv_act++; + } + if (!p->lbprm.ss.srv || + ((p->lbprm.ss.srv->flags & SRV_F_BACKUP) && !(srv->flags & SRV_F_BACKUP))) { + /* we didn't have a server or were sticking on a backup server, + * but now we have an active server, let's switch to it + */ + p->lbprm.ss.srv = srv; + } + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); + + out_update_state: + srv_lb_commit_status(srv); +} + +/* This function elects a new stick server for proxy px. + * + * The lbprm's lock must be held. + */ +void recalc_server_ss(struct proxy *px) +{ + struct server *cur, *first; + int flag; + + if (!px->lbprm.tot_used) + return; /* no server */ + + /* here we *know* that we have some servers */ + if (px->srv_act) + flag = 0; + else + flag = SRV_F_BACKUP; + + first = NULL; + + for (cur = px->srv; cur; cur = cur->next) { + if ((cur->flags & SRV_F_BACKUP) == flag && + srv_willbe_usable(cur)) { + first = cur; + break; + } + } + px->lbprm.ss.srv = first; +} + +/* This function is responsible for preparing sticky LB algorithm. + * It should be called only once per proxy, at config time. + */ +void init_server_ss(struct proxy *p) +{ + struct server *srv; + + p->lbprm.set_server_status_up = ss_set_server_status_up; + p->lbprm.set_server_status_down = ss_set_server_status_down; + p->lbprm.update_server_eweight = NULL; + + if (!p->srv) + return; + + for (srv = p->srv; srv; srv = srv->next) { + srv->next_eweight = 1; /* ignore weights, all servers have the same weight */ + srv_lb_commit_status(srv); + } + + /* recounts servers and their weights */ + recount_servers(p); + update_backend_weight(p); + recalc_server_ss(p); +} + +/* + * This function returns the server that we're sticking on. If any server + * is found, it will be returned. If no valid server is found, NULL is + * returned. + * + * The lbprm's lock will be used. + */ +struct server *ss_get_server(struct proxy *px) +{ + struct server *srv = NULL; + + HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); + srv = px->lbprm.ss.srv; + HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); + return srv; +} diff --git a/src/linuxcap.c b/src/linuxcap.c index 4a2a3ab..63a510f 100644 --- a/src/linuxcap.c +++ b/src/linuxcap.c @@ -40,11 +40,20 @@ static const struct { #ifdef CAP_NET_BIND_SERVICE { CAP_NET_BIND_SERVICE, "cap_net_bind_service" }, #endif +#ifdef CAP_SYS_ADMIN + { CAP_SYS_ADMIN, "cap_sys_admin" }, +#endif /* must be last */ { 0, 0 } }; /* provided by sys/capability.h on some distros */ +static inline int capget(cap_user_header_t hdrp, const cap_user_data_t datap) +{ + return syscall(SYS_capget, hdrp, datap); +} + +/* provided by sys/capability.h on some distros */ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) { return syscall(SYS_capset, hdrp, datap); @@ -53,6 +62,86 @@ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) /* defaults to zero, i.e. we don't keep any cap after setuid() */ static uint32_t caplist; +/* try to check if CAP_NET_ADMIN, CAP_NET_RAW or CAP_SYS_ADMIN are in the + * process Effective set in the case when euid is non-root. If there is a + * match, LSTCHK_NETADM or LSTCHK_SYSADM is unset respectively from + * global.last_checks to avoid warning due to global.last_checks verifications + * later at the process init stage. + * If there is no any supported by haproxy capability in the process Effective + * set, try to check the process Permitted set. In this case we promote from + * Permitted set to Effective only the capabilities, that were marked by user + * via 'capset' keyword in the global section (caplist). If there is match with + * caplist and CAP_NET_ADMIN/CAP_NET_RAW or CAP_SYS_ADMIN are in this list, + * LSTCHK_NETADM or/and LSTCHK_SYSADM will be unset by the same reason. + * We do this only if the current euid is non-root and there is no global.uid. + * Otherwise, the process will continue either to run under root, or it will do + * a transition to unprivileged user later in prepare_caps_for_setuid(), + * which specially manages its capabilities in that case. + * Always returns 0. Diagnostic warnings will be emitted only, if + * LSTCHK_NETADM/LSTCHK_SYSADM is presented in global.last_checks and some + * failures are encountered. + */ +int prepare_caps_from_permitted_set(int from_uid, int to_uid, const char *program_name) +{ + struct __user_cap_data_struct start_cap_data = { }; + struct __user_cap_header_struct cap_hdr = { + .pid = 0, /* current process */ + .version = _LINUX_CAPABILITY_VERSION_1, + }; + + /* started as root */ + if (!from_uid) + return 0; + + /* will change ruid and euid later in set_identity() */ + if (to_uid) + return 0; + + /* first, let's check if CAP_NET_ADMIN or CAP_NET_RAW is already in + * the process effective set. This may happen, when administrator sets + * these capabilities and the file effective bit on haproxy binary via + * setcap, see capabilities man page for details. + */ + if (capget(&cap_hdr, &start_cap_data) == -1) { + if (global.last_checks & (LSTCHK_NETADM | LSTCHK_SYSADM)) + ha_diag_warning("Failed to get process capabilities using capget(): %s. " + "Can't use capabilities that might be set on %s binary " + "by administrator.\n", strerror(errno), program_name); + return 0; + } + + if (start_cap_data.effective & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) { + global.last_checks &= ~LSTCHK_NETADM; + return 0; + } + + if (start_cap_data.effective & ((1 << CAP_SYS_ADMIN))) { + global.last_checks &= ~LSTCHK_SYSADM; + return 0; + } + + /* second, try to check process permitted set, in this case caplist is + * necessary. Allows to put cap_net_bind_service in process effective + * set, if it is in the caplist and also presented in the binary + * permitted set. + */ + if (caplist && start_cap_data.permitted & caplist) { + start_cap_data.effective |= start_cap_data.permitted & caplist; + if (capset(&cap_hdr, &start_cap_data) == 0) { + if (caplist & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) + global.last_checks &= ~LSTCHK_NETADM; + if (caplist & (1 << CAP_SYS_ADMIN)) + global.last_checks &= ~LSTCHK_SYSADM; + } else if (global.last_checks & (LSTCHK_NETADM|LSTCHK_SYSADM)) { + ha_diag_warning("Failed to put capabilities from caplist in %s " + "process Effective capabilities set using capset(): %s\n", + program_name, strerror(errno)); + } + } + + return 0; +} + /* try to apply capabilities before switching UID from <from_uid> to <to_uid>. * In practice we need to do this in 4 steps: * - set PR_SET_KEEPCAPS to preserve caps across the final setuid() @@ -61,7 +150,8 @@ static uint32_t caplist; * - set the effective and permitted caps again * - then the caller can safely call setuid() * On success LSTCHK_NETADM is unset from global.last_checks, if CAP_NET_ADMIN - * or CAP_NET_RAW was found in the caplist from config. + * or CAP_NET_RAW was found in the caplist from config. Same for + * LSTCHK_SYSADM, if CAP_SYS_ADMIN was found in the caplist from config. * We don't do this if the current euid is not zero or if the target uid * is zero. Returns 0 on success, negative on failure. Alerts may be emitted. */ @@ -107,6 +197,9 @@ int prepare_caps_for_setuid(int from_uid, int to_uid) if (caplist & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) global.last_checks &= ~LSTCHK_NETADM; + if (caplist & (1 << CAP_SYS_ADMIN)) + global.last_checks &= ~LSTCHK_SYSADM; + /* all's good */ return 0; } diff --git a/src/listener.c b/src/listener.c index 75e164a..a348558 100644 --- a/src/listener.c +++ b/src/listener.c @@ -27,6 +27,7 @@ #include <haproxy/freq_ctr.h> #include <haproxy/frontend.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/list.h> #include <haproxy/listener.h> #include <haproxy/log.h> @@ -443,9 +444,9 @@ int default_resume_listener(struct listener *l) err = l->rx.proto->fam->bind(&l->rx, &errmsg); if (err != ERR_NONE) { if (err & ERR_WARN) - ha_warning("Resuming listener: %s\n", errmsg); + ha_warning("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, errmsg); else if (err & ERR_ALERT) - ha_alert("Resuming listener: %s\n", errmsg); + ha_alert("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, errmsg); ha_free(&errmsg); if (err & (ERR_FATAL | ERR_ABORT)) { ret = 0; @@ -460,9 +461,9 @@ int default_resume_listener(struct listener *l) BUG_ON(!l->rx.proto->listen); err = l->rx.proto->listen(l, msg, sizeof(msg)); if (err & ERR_ALERT) - ha_alert("Resuming listener: %s\n", msg); + ha_alert("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, msg); else if (err & ERR_WARN) - ha_warning("Resuming listener: %s\n", msg); + ha_warning("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, msg); if (err & (ERR_FATAL | ERR_ABORT)) { ret = 0; @@ -816,6 +817,8 @@ int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss, if (fd != -1) l->rx.flags |= RX_F_INHERITED; + guid_init(&l->guid); + l->extra_counters = NULL; HA_RWLOCK_INIT(&l->lock); @@ -913,6 +916,7 @@ struct listener *clone_listener(struct listener *src) goto oom1; memcpy(l, src, sizeof(*l)); + l->luid = 0; // don't dup the listener's ID! if (l->name) { l->name = strdup(l->name); if (!l->name) @@ -1066,11 +1070,11 @@ void listener_accept(struct listener *l) } #endif if (p && p->fe_sps_lim) { - int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0); + int max = freq_ctr_remain(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0); if (unlikely(!max)) { /* frontend accept rate limit was reached */ - expire = tick_add(now_ms, next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0)); + expire = tick_add(now_ms, next_event_delay(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0)); goto limit_proxy; } @@ -1541,7 +1545,7 @@ void listener_accept(struct listener *l) dequeue_all_listeners(); if (p && !MT_LIST_ISEMPTY(&p->listener_queue) && - (!p->fe_sps_lim || freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0) > 0)) + (!p->fe_sps_lim || freq_ctr_remain(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0) > 0)) dequeue_proxy_listeners(p); } return; @@ -1600,14 +1604,14 @@ void listener_release(struct listener *l) dequeue_all_listeners(); if (fe && !MT_LIST_ISEMPTY(&fe->listener_queue) && - (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_sess_per_sec, fe->fe_sps_lim, 0) > 0)) + (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_counters.sess_per_sec, fe->fe_sps_lim, 0) > 0)) dequeue_proxy_listeners(fe); else { unsigned int wait; int expire = TICK_ETERNITY; if (fe->task && fe->fe_sps_lim && - (wait = next_event_delay(&fe->fe_sess_per_sec,fe->fe_sps_lim, 0))) { + (wait = next_event_delay(&fe->fe_counters.sess_per_sec,fe->fe_sps_lim, 0))) { /* we're blocking because a limit was reached on the number of * requests/s on the frontend. We want to re-check ASAP, which * means in 1 ms before estimated expiration date, because the @@ -1713,8 +1717,8 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) else { if (fe != global.cli_fe) ha_diag_warning("[%s:%d]: Disabling per-thread sharding for listener in" - " %s '%s' because SO_REUSEPORT is disabled\n", - bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id); + " %s '%s' because SO_REUSEPORT is disabled for %s protocol.\n", + bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id, li->rx.proto->name); shards = 1; } } @@ -1727,8 +1731,8 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) /* We also need to check if an explicit shards count was set and cannot be honored */ if (shards > 1 && !protocol_supports_flag(li->rx.proto, PROTO_F_REUSEPORT_SUPPORTED)) { - ha_warning("[%s:%d]: Disabling sharding for listener in %s '%s' because SO_REUSEPORT is disabled\n", - bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id); + ha_warning("[%s:%d]: Disabling sharding for listener in %s '%s' because SO_REUSEPORT is disabled for %s protocol.\n", + bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id, li->rx.proto->name); shards = 1; } @@ -1807,6 +1811,12 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) *err_code |= ERR_FATAL | ERR_ALERT; return cfgerr; } + /* assign the ID to the first one only */ + new_li->luid = new_li->conf.id.key = tmp_li->luid; + tmp_li->luid = 0; + eb32_delete(&tmp_li->conf.id); + if (tmp_li->luid) + eb32_insert(&fe->conf.used_listener_id, &new_li->conf.id); new_li = tmp_li; } } @@ -1825,6 +1835,12 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) *err_code |= ERR_FATAL | ERR_ALERT; return cfgerr; } + /* assign the ID to the first one only */ + new_li->luid = new_li->conf.id.key = li->luid; + li->luid = 0; + eb32_delete(&li->conf.id); + if (li->luid) + eb32_insert(&fe->conf.used_listener_id, &new_li->conf.id); } } @@ -1832,6 +1848,43 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) return cfgerr; } +/* Generate and insert unique GUID for each listeners of <bind_conf> instance + * if GUID prefix is defined. + * + * Returns 0 on success else non-zero. + */ +int bind_generate_guid(struct bind_conf *bind_conf) +{ + struct listener *l; + char *guid_err = NULL; + + if (!bind_conf->guid_prefix) + return 0; + + list_for_each_entry(l, &bind_conf->listeners, by_bind) { + if (bind_conf->guid_idx == (size_t)-1) { + ha_alert("[%s:%d] : error on GUID generation : Too many listeners.\n", + bind_conf->file, bind_conf->line); + return 1; + } + + chunk_printf(&trash, "%s-%lld", bind_conf->guid_prefix, + (ullong)bind_conf->guid_idx); + + if (guid_insert(&l->obj_type, b_head(&trash), &guid_err)) { + ha_alert("[%s:%d] : error on GUID generation : %s. " + "You may fix it by adjusting guid-prefix.\n", + bind_conf->file, bind_conf->line, guid_err); + ha_free(&guid_err); + return 1; + } + + ++bind_conf->guid_idx; + } + + return 0; +} + /* * Registers the bind keyword list <kwl> as a list of valid keywords for next * parsing sessions. @@ -1975,6 +2028,9 @@ struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file, #endif LIST_INIT(&bind_conf->listeners); + bind_conf->guid_prefix = NULL; + bind_conf->guid_idx = 0; + bind_conf->rhttp_srvname = NULL; return bind_conf; @@ -2082,6 +2138,26 @@ static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct return 0; } +/* parse the "guid-prefix" bind keyword */ +static int bind_parse_guid_prefix(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) +{ + char *prefix = NULL; + + if (!*args[cur_arg + 1]) { + memprintf(err, "'%s' : expects an argument", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + prefix = strdup(args[cur_arg + 1]); + if (!prefix) { + memprintf(err, "'%s' : out of memory", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + conf->guid_prefix = prefix; + return 0; +} + /* parse the "id" bind keyword */ static int bind_parse_id(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { @@ -2225,8 +2301,8 @@ int bind_parse_args_list(struct bind_conf *bind_conf, char **args, int cur_arg, #if (!defined(IP_PKTINFO) && !defined(IP_RECVDSTADDR)) || !defined(IPV6_RECVPKTINFO) list_for_each_entry(l, &bind_conf->listeners, by_bind) { if (++listener_count > 1 || !is_inet_addr(&l->rx.addr)) { - ha_diag_warning("parsing [%s:%d] : '%s %s' in section '%s' : UDP binding on multiple addresses without IP_PKTINFO or equivalent support may be unreliable.\n", - file, linenum, args[0], args[1], section); + ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : UDP binding on multiple addresses without IP_PKTINFO or equivalent support may be unreliable.\n", + file, linenum, args[0], args[1], section); break; } } @@ -2486,6 +2562,7 @@ static struct bind_kw_list bind_kws = { "ALL", { }, { { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1, 0 }, /* enable NetScaler Client IP insertion protocol */ { "accept-proxy", bind_parse_accept_proxy, 0, 0 }, /* enable PROXY protocol */ { "backlog", bind_parse_backlog, 1, 0 }, /* set backlog of listening socket */ + { "guid-prefix", bind_parse_guid_prefix, 1, 1 }, /* set guid of listening socket */ { "id", bind_parse_id, 1, 1 }, /* set id of listening socket */ { "maxconn", bind_parse_maxconn, 1, 0 }, /* set maxconn of listening socket */ { "name", bind_parse_name, 1, 1 }, /* set name of listening socket */ @@ -33,6 +33,10 @@ #include <haproxy/http.h> #include <haproxy/http_ana.h> #include <haproxy/listener.h> +#include <haproxy/lb_chash.h> +#include <haproxy/lb_fwrr.h> +#include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/log.h> #include <haproxy/proxy.h> #include <haproxy/sample.h> @@ -45,6 +49,7 @@ #include <haproxy/time.h> #include <haproxy/hash.h> #include <haproxy/tools.h> +#include <haproxy/vecpair.h> /* global recv logs counter */ int cum_log_messages; @@ -90,7 +95,9 @@ static const struct log_fmt_st log_formats[LOG_FORMATS] = { * that the byte should be escaped. Be careful to always pass bytes from 0 to * 255 exclusively to the macros. */ +long no_escape_map[(256/8) / sizeof(long)]; long rfc5424_escape_map[(256/8) / sizeof(long)]; +long json_escape_map[(256/8) / sizeof(long)]; long hdr_encode_map[(256/8) / sizeof(long)]; long url_encode_map[(256/8) / sizeof(long)]; long http_encode_map[(256/8) / sizeof(long)]; @@ -112,21 +119,81 @@ const char *log_levels[NB_LOG_LEVELS] = { const char sess_term_cond[16] = "-LcCsSPRIDKUIIII"; /* normal, Local, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */ const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */ +const struct buffer empty = { }; -/* log_format */ -struct logformat_type { - char *name; - int type; - int mode; - int lw; /* logwait bitsfield */ - int (*config_callback)(struct logformat_node *node, struct proxy *curproxy); -}; - int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy); -/* log_format variable names */ -static const struct logformat_type logformat_keywords[] = { +/* logformat alias types (internal use) */ +enum logformat_alias_type { + LOG_FMT_GLOBAL, + LOG_FMT_CLIENTIP, + LOG_FMT_CLIENTPORT, + LOG_FMT_BACKENDIP, + LOG_FMT_BACKENDPORT, + LOG_FMT_FRONTENDIP, + LOG_FMT_FRONTENDPORT, + LOG_FMT_SERVERPORT, + LOG_FMT_SERVERIP, + LOG_FMT_COUNTER, + LOG_FMT_LOGCNT, + LOG_FMT_PID, + LOG_FMT_DATE, + LOG_FMT_DATEGMT, + LOG_FMT_DATELOCAL, + LOG_FMT_TS, + LOG_FMT_MS, + LOG_FMT_FRONTEND, + LOG_FMT_FRONTEND_XPRT, + LOG_FMT_BACKEND, + LOG_FMT_SERVER, + LOG_FMT_BYTES, + LOG_FMT_BYTES_UP, + LOG_FMT_Ta, + LOG_FMT_Th, + LOG_FMT_Ti, + LOG_FMT_TQ, + LOG_FMT_TW, + LOG_FMT_TC, + LOG_FMT_Tr, + LOG_FMT_tr, + LOG_FMT_trg, + LOG_FMT_trl, + LOG_FMT_TR, + LOG_FMT_TD, + LOG_FMT_TT, + LOG_FMT_TU, + LOG_FMT_STATUS, + LOG_FMT_CCLIENT, + LOG_FMT_CSERVER, + LOG_FMT_TERMSTATE, + LOG_FMT_TERMSTATE_CK, + LOG_FMT_ACTCONN, + LOG_FMT_FECONN, + LOG_FMT_BECONN, + LOG_FMT_SRVCONN, + LOG_FMT_RETRIES, + LOG_FMT_SRVQUEUE, + LOG_FMT_BCKQUEUE, + LOG_FMT_HDRREQUEST, + LOG_FMT_HDRRESPONS, + LOG_FMT_HDRREQUESTLIST, + LOG_FMT_HDRRESPONSLIST, + LOG_FMT_REQ, + LOG_FMT_HTTP_METHOD, + LOG_FMT_HTTP_URI, + LOG_FMT_HTTP_PATH, + LOG_FMT_HTTP_PATH_ONLY, + LOG_FMT_HTTP_QUERY, + LOG_FMT_HTTP_VERSION, + LOG_FMT_HOSTNAME, + LOG_FMT_UNIQUEID, + LOG_FMT_SSL_CIPHER, + LOG_FMT_SSL_VERSION, +}; + +/* log_format alias names */ +static const struct logformat_alias logformat_aliases[] = { { "o", LOG_FMT_GLOBAL, PR_MODE_TCP, 0, NULL }, /* global option */ /* please keep these lines sorted ! */ @@ -208,6 +275,36 @@ char *log_format = NULL; */ char default_rfc5424_sd_log_format[] = "- "; +/* returns true if the input logformat string is one of the default ones declared + * above + */ +static inline int logformat_str_isdefault(const char *str) +{ + return str == httpclient_log_format || + str == default_http_log_format || + str == default_https_log_format || + str == clf_http_log_format || + str == default_tcp_log_format || + str == default_rfc5424_sd_log_format; +} + +/* free logformat str if it is not a default (static) one */ +static inline void logformat_str_free(char **str) +{ + if (!logformat_str_isdefault(*str)) + ha_free(str); +} + +/* duplicate and return logformat str if it is not a default (static) + * one, else return the original one + */ +static inline char *logformat_str_dup(char *str) +{ + if (logformat_str_isdefault(str)) + return str; + return strdup(str); +} + /* total number of dropped logs */ unsigned int dropped_logs = 0; @@ -221,17 +318,20 @@ THREAD_LOCAL char *logline = NULL; */ THREAD_LOCAL char *logline_rfc5424 = NULL; -struct logformat_var_args { +struct logformat_node_args { char *name; int mask; }; -struct logformat_var_args var_args_list[] = { +struct logformat_node_args node_args_list[] = { // global { "M", LOG_OPT_MANDATORY }, { "Q", LOG_OPT_QUOTE }, { "X", LOG_OPT_HEXA }, { "E", LOG_OPT_ESC }, + { "bin", LOG_OPT_BIN }, + { "json", LOG_OPT_ENCODE_JSON }, + { "cbor", LOG_OPT_ENCODE_CBOR }, { 0, 0 } }; @@ -240,17 +340,19 @@ struct logformat_var_args var_args_list[] = { */ int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy) { - curproxy->options2 |= PR_O2_SRC_ADDR; + if ((curproxy->flags & PR_FL_CHECKED)) + return 0; - return 0; + curproxy->options2 |= PR_O2_SRC_ADDR; + return 1; } /* - * Parse args in a logformat_var. Returns 0 in error + * Parse args in a logformat_node. Returns 0 in error * case, otherwise, it returns 1. */ -int parse_logformat_var_args(char *args, struct logformat_node *node, char **err) +int parse_logformat_node_args(char *args, struct logformat_node *node, char **err) { int i = 0; int end = 0; @@ -258,7 +360,7 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err char *sp = NULL; // start pointer if (args == NULL) { - memprintf(err, "internal error: parse_logformat_var_args() expects non null 'args'"); + memprintf(err, "internal error: parse_logformat_node_args() expects non null 'args'"); return 0; } @@ -279,13 +381,19 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err if (*args == '\0' || *args == ',') { *args = '\0'; - for (i = 0; sp && var_args_list[i].name; i++) { - if (strcmp(sp, var_args_list[i].name) == 0) { + for (i = 0; sp && node_args_list[i].name; i++) { + if (strcmp(sp, node_args_list[i].name) == 0) { if (flags == 1) { - node->options |= var_args_list[i].mask; + /* Ensure we don't mix encoding types, existing + * encoding type prevails over new ones + */ + if (node->options & LOG_OPT_ENCODE) + node->options |= (node_args_list[i].mask & ~LOG_OPT_ENCODE); + else + node->options |= node_args_list[i].mask; break; } else if (flags == 2) { - node->options &= ~var_args_list[i].mask; + node->options &= ~node_args_list[i].mask; break; } } @@ -300,64 +408,71 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err } /* - * Parse a variable '%varname' or '%{args}varname' in log-format. The caller + * Parse an alias '%aliasname' or '%{args}aliasname' in log-format. The caller * must pass the args part in the <arg> pointer with its length in <arg_len>, - * and varname with its length in <var> and <var_len> respectively. <arg> is - * ignored when arg_len is 0. Neither <var> nor <var_len> may be null. + * and aliasname with its length in <alias> and <alias_len> respectively. <arg> + * is ignored when arg_len is 0. Neither <alias> nor <alias_len> may be null. * Returns false in error case and err is filled, otherwise returns true. */ -int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions, char **err) +static int parse_logformat_alias(char *arg, int arg_len, char *name, int name_len, int typecast, + char *alias, int alias_len, struct lf_expr *lf_expr, + int *defoptions, char **err) { int j; + struct list *list_format= &lf_expr->nodes.list; struct logformat_node *node = NULL; - for (j = 0; logformat_keywords[j].name; j++) { // search a log type - if (strlen(logformat_keywords[j].name) == var_len && - strncmp(var, logformat_keywords[j].name, var_len) == 0) { - if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) { - node = calloc(1, sizeof(*node)); - if (!node) { - memprintf(err, "out of memory error"); + for (j = 0; logformat_aliases[j].name; j++) { // search a log type + if (strlen(logformat_aliases[j].name) == alias_len && + strncmp(alias, logformat_aliases[j].name, alias_len) == 0) { + node = calloc(1, sizeof(*node)); + if (!node) { + memprintf(err, "out of memory error"); + goto error_free; + } + node->type = LOG_FMT_ALIAS; + node->alias = &logformat_aliases[j]; + node->typecast = typecast; + if (name && name_len) + node->name = my_strndup(name, name_len); + node->options = *defoptions; + if (arg_len) { + node->arg = my_strndup(arg, arg_len); + if (!parse_logformat_node_args(node->arg, node, err)) goto error_free; + } + if (node->alias->type == LOG_FMT_GLOBAL) { + *defoptions = node->options; + if (lf_expr->nodes.options == LOG_OPT_NONE) + lf_expr->nodes.options = node->options; + else { + /* global options were previously set and were + * overwritten for nodes that appear after the + * current one. + * + * However, for lf_expr->nodes.options we must + * keep a track of options common to ALL nodes, + * thus we take previous global options into + * account to compute the new logformat + * expression wide (global) node options. + */ + lf_expr->nodes.options &= node->options; } - node->type = logformat_keywords[j].type; - node->options = *defoptions; - if (arg_len) { - node->arg = my_strndup(arg, arg_len); - if (!parse_logformat_var_args(node->arg, node, err)) - goto error_free; - } - if (node->type == LOG_FMT_GLOBAL) { - *defoptions = node->options; - free(node->arg); - free(node); - } else { - if (logformat_keywords[j].config_callback && - logformat_keywords[j].config_callback(node, curproxy) != 0) { - goto error_free; - } - curproxy->to_log |= logformat_keywords[j].lw; - LIST_APPEND(list_format, &node->list); - } - return 1; + free_logformat_node(node); } else { - memprintf(err, "format variable '%s' is reserved for HTTP mode", - logformat_keywords[j].name); - goto error_free; + LIST_APPEND(list_format, &node->list); } + return 1; } } - j = var[var_len]; - var[var_len] = 0; - memprintf(err, "no such format variable '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", var); - var[var_len] = j; + j = alias[alias_len]; + alias[alias_len] = 0; + memprintf(err, "no such format alias '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", alias); + alias[alias_len] = j; error_free: - if (node) { - free(node->arg); - free(node); - } + free_logformat_node(node); return 0; } @@ -367,13 +482,14 @@ int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct p * start: start pointer * end: end text pointer * type: string type - * list_format: destination list + * lf_expr: destination logformat expr (list of fmt nodes) * * LOG_TEXT: copy chars from start to end excluding end. * */ -int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err) +int add_to_logformat_list(char *start, char *end, int type, struct lf_expr *lf_expr, char **err) { + struct list *list_format = &lf_expr->nodes.list; char *str; if (type == LF_TEXT) { /* type text */ @@ -401,17 +517,19 @@ int add_to_logformat_list(char *start, char *end, int type, struct list *list_fo } /* - * Parse the sample fetch expression <text> and add a node to <list_format> upon - * success. At the moment, sample converters are not yet supported but fetch arguments - * should work. The curpx->conf.args.ctx must be set by the caller. If an end pointer + * Parse the sample fetch expression <text> and add a node to <lf_expr> upon + * success. The curpx->conf.args.ctx must be set by the caller. If an end pointer * is passed in <endptr>, it will be updated with the pointer to the first character * not part of the sample expression. * * In error case, the function returns 0, otherwise it returns 1. */ -int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err, char **endptr) +static int add_sample_to_logformat_list(char *text, char *name, int name_len, int typecast, + char *arg, int arg_len, struct lf_expr *lf_expr, + struct arg_list *al, int options, int cap, char **err, char **endptr) { char *cmd[2]; + struct list *list_format = &lf_expr->nodes.list; struct sample_expr *expr = NULL; struct logformat_node *node = NULL; int cmd_arg; @@ -420,8 +538,8 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox cmd[1] = ""; cmd_arg = 0; - expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err, - &curpx->conf.args, endptr); + expr = sample_parse_expr(cmd, &cmd_arg, lf_expr->conf.file, lf_expr->conf.line, err, + al, endptr); if (!expr) { memprintf(err, "failed to parse sample expression <%s> : %s", text, *err); goto error_free; @@ -429,16 +547,20 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox node = calloc(1, sizeof(*node)); if (!node) { + release_sample_expr(expr); memprintf(err, "out of memory error"); goto error_free; } + if (name && name_len) + node->name = my_strndup(name, name_len); node->type = LOG_FMT_EXPR; + node->typecast = typecast; node->expr = expr; node->options = options; if (arg_len) { node->arg = my_strndup(arg, arg_len); - if (!parse_logformat_var_args(node->arg, node, err)) + if (!parse_logformat_node_args(node->arg, node, err)) goto error_free; } if (expr->fetch->val & cap & SMP_VAL_REQUEST) @@ -455,71 +577,71 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox if ((options & LOG_OPT_HTTP) && (expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_L6RES))) { ha_warning("parsing [%s:%d] : L6 sample fetch <%s> ignored in HTTP log-format string.\n", - curpx->conf.args.file, curpx->conf.args.line, text); + lf_expr->conf.file, lf_expr->conf.line, text); } - /* check if we need to allocate an http_txn struct for HTTP parsing */ - /* Note, we may also need to set curpx->to_log with certain fetches */ - curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY); - - /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags - * needed with some sample fetches (eg: ssl*). We always set it for - * now on, but this will leave with sample capabilities soon. - */ - curpx->to_log |= LW_XPRT; - if (curpx->http_needed) - curpx->to_log |= LW_REQ; LIST_APPEND(list_format, &node->list); return 1; error_free: - release_sample_expr(expr); - if (node) { - free(node->arg); - free(node); - } + free_logformat_node(node); return 0; } /* - * Parse the log_format string and fill a linked list. - * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname - * You can set arguments using { } : %{many arguments}varname. - * The curproxy->conf.args.ctx must be set by the caller. + * Compile logformat expression (from string to list of logformat nodes) + * + * Aliases are preceded by % and composed by characters [a-zA-Z0-9]* : %aliasname + * Expressions are preceded by % and enclosed in square brackets: %[expr] + * You can set arguments using { } : %{many arguments}aliasname + * %{many arguments}[expr] * - * fmt: the string to parse - * curproxy: the proxy affected - * list_format: the destination list + * lf_expr: the destination logformat expression (logformat_node list) + * which is supposed to be configured (str and conf set) but + * shouldn't be compiled (shouldn't contain any nodes) + * al: arg list where sample expr should store arg dependency (if the logformat + * expression involves sample expressions), may be NULL * options: LOG_OPT_* to force on every node * cap: all SMP_VAL_* flags supported by the consumer * * The function returns 1 in success case, otherwise, it returns 0 and err is filled. */ -int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err) +int lf_expr_compile(struct lf_expr *lf_expr, + struct arg_list *al, int options, int cap, char **err) { + char *fmt = lf_expr->str; /* will be freed unless default */ char *sp, *str, *backfmt; /* start pointer for text parts */ char *arg = NULL; /* start pointer for args */ - char *var = NULL; /* start pointer for vars */ + char *alias = NULL; /* start pointer for aliases */ + char *name = NULL; /* token name (optional) */ + char *typecast_str = NULL; /* token output type (if custom name is set) */ int arg_len = 0; - int var_len = 0; + int alias_len = 0; + int name_len = 0; + int typecast = SMP_T_SAME; /* relaxed by default */ int cformat; /* current token format */ int pformat; /* previous token format */ - struct logformat_node *tmplf, *back; + + BUG_ON((lf_expr->flags & LF_FL_COMPILED)); + + if (!fmt) + return 1; // nothing to do sp = str = backfmt = strdup(fmt); if (!str) { memprintf(err, "out of memory error"); return 0; } - curproxy->to_log |= LW_INIT; - /* flush the list first. */ - list_for_each_entry_safe(tmplf, back, list_format, list) { - LIST_DELETE(&tmplf->list); - release_sample_expr(tmplf->expr); - free(tmplf->arg); - free(tmplf); - } + /* Prepare lf_expr nodes, past this lf_expr doesn't know about ->str + * anymore as ->str and ->nodes are part of the same union. ->str has + * been saved as local 'fmt' string pointer, so we must free it before + * returning. + */ + LIST_INIT(&lf_expr->nodes.list); + lf_expr->nodes.options = LOG_OPT_NONE; + /* we must set the compiled flag now for proper deinit in case of failure */ + lf_expr->flags |= LF_FL_COMPILED; for (cformat = LF_INIT; cformat != LF_END; str++) { pformat = cformat; @@ -533,20 +655,62 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list * We use the common LF_INIT state to dispatch to the different final states. */ switch (pformat) { - case LF_STARTVAR: // text immediately following a '%' - arg = NULL; var = NULL; - arg_len = var_len = 0; + case LF_STARTALIAS: // text immediately following a '%' + arg = NULL; alias = NULL; + name = NULL; + name_len = 0; + typecast = SMP_T_SAME; + arg_len = alias_len = 0; + if (*str == '(') { // custom output name + cformat = LF_STONAME; + name = str + 1; + } + else + goto startalias; + break; + + case LF_STONAME: // text immediately following '%(' + case LF_STOTYPE: + if (cformat == LF_STONAME && *str == ':') { // start custom output type + cformat = LF_STOTYPE; + name_len = str -name; + typecast_str = str + 1; + } + else if (*str == ')') { // end of custom output name + if (cformat == LF_STONAME) + name_len = str - name; + else { + /* custom type */ + *str = 0; // so that typecast_str is 0 terminated + typecast = type_to_smp(typecast_str); + if (typecast != SMP_T_STR && typecast != SMP_T_SINT && + typecast != SMP_T_BOOL) { + memprintf(err, "unexpected output type '%.*s' at position %d line : '%s'. Supported types are: str, sint, bool", (int)(str - typecast_str), typecast_str, (int)(typecast_str - backfmt), fmt); + goto fail; + } + } + cformat = LF_EDONAME; + } else if (!isalnum((unsigned char)*str) && *str != '_' && *str != '-') { + memprintf(err, "invalid character in custom name near '%c' at position %d line : '%s'", + *str, (int)(str - backfmt), fmt); + + goto fail; + } + break; + + case LF_EDONAME: // text immediately following %(name) + startalias: if (*str == '{') { // optional argument cformat = LF_STARG; arg = str + 1; } else if (*str == '[') { cformat = LF_STEXPR; - var = str + 1; // store expr in variable name + alias = str + 1; // store expr in alias name } - else if (isalpha((unsigned char)*str)) { // variable name - cformat = LF_VAR; - var = str; + else if (isalpha((unsigned char)*str)) { // alias name + cformat = LF_ALIAS; + alias = str; } else if (*str == '%') cformat = LF_TEXT; // convert this character to a literal (useful for '%') @@ -555,7 +719,7 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list cformat = LF_TEXT; pformat = LF_TEXT; /* finally we include the previous char as well */ sp = str - 1; /* send both the '%' and the current char */ - memprintf(err, "unexpected variable name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'", + memprintf(err, "unexpected alias name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'", *str, (int)(str - backfmt), fmt); goto fail; @@ -575,15 +739,15 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list case LF_EDARG: // text immediately following '%{arg}' if (*str == '[') { cformat = LF_STEXPR; - var = str + 1; // store expr in variable name + alias = str + 1; // store expr in alias name break; } - else if (isalnum((unsigned char)*str)) { // variable name - cformat = LF_VAR; - var = str; + else if (isalnum((unsigned char)*str)) { // alias name + cformat = LF_ALIAS; + alias = str; break; } - memprintf(err, "parse argument modifier without variable name near '%%{%s}'", arg); + memprintf(err, "parse argument modifier without alias name near '%%{%s}'", arg); goto fail; case LF_STEXPR: // text immediately following '%[' @@ -592,7 +756,7 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list * part of the expression, which MUST be the trailing * angle bracket. */ - if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &str)) + if (!add_sample_to_logformat_list(alias, name, name_len, typecast, arg, arg_len, lf_expr, al, options, cap, err, &str)) goto fail; if (*str == ']') { @@ -604,26 +768,26 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list char c = *str; *str = 0; if (isprint((unsigned char)c)) - memprintf(err, "expected ']' after '%s', but found '%c'", var, c); + memprintf(err, "expected ']' after '%s', but found '%c'", alias, c); else - memprintf(err, "missing ']' after '%s'", var); + memprintf(err, "missing ']' after '%s'", alias); goto fail; } break; - case LF_VAR: // text part of a variable name - var_len = str - var; + case LF_ALIAS: // text part of a alias name + alias_len = str - alias; if (!isalnum((unsigned char)*str)) - cformat = LF_INIT; // not variable name anymore + cformat = LF_INIT; // not alias name anymore break; default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END, LF_EDEXPR cformat = LF_INIT; } - if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */ + if (cformat == LF_INIT) { /* resynchronize state to text/sep/startalias */ switch (*str) { - case '%': cformat = LF_STARTVAR; break; + case '%': cformat = LF_STARTALIAS; break; case 0 : cformat = LF_END; break; case ' ': if (options & LOG_OPT_MERGE_SPACES) { @@ -637,13 +801,13 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list if (cformat != pformat || pformat == LF_SEPARATOR) { switch (pformat) { - case LF_VAR: - if (!parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options, err)) + case LF_ALIAS: + if (!parse_logformat_alias(arg, arg_len, name, name_len, typecast, alias, alias_len, lf_expr, &options, err)) goto fail; break; case LF_TEXT: case LF_SEPARATOR: - if (!add_to_logformat_list(sp, str, pformat, list_format, err)) + if (!add_to_logformat_list(sp, str, pformat, lf_expr, err)) goto fail; break; } @@ -651,18 +815,236 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list } } - if (pformat == LF_STARTVAR || pformat == LF_STARG || pformat == LF_STEXPR) { - memprintf(err, "truncated line after '%s'", var ? var : arg ? arg : "%"); + if (pformat == LF_STARTALIAS || pformat == LF_STARG || pformat == LF_STEXPR || pformat == LF_STONAME || pformat == LF_STOTYPE || pformat == LF_EDONAME) { + memprintf(err, "truncated line after '%s'", alias ? alias : arg ? arg : "%"); + goto fail; + } + logformat_str_free(&fmt); + ha_free(&backfmt); + + return 1; + fail: + logformat_str_free(&fmt); + ha_free(&backfmt); + return 0; +} + +/* lf_expr_compile() helper: uses <curproxy> to deduce settings and + * simplify function usage, mostly for legacy purpose + * + * curproxy->conf.args.ctx must be set by the caller. + * + * The logformat expression will be scheduled for postcheck on the proxy unless + * the proxy was already checked, in which case all checks will be performed right + * away. + * + * Returns 1 on success and 0 on failure. On failure: <lf_expr> will be cleaned + * up and <err> will be set. + */ +int parse_logformat_string(const char *fmt, struct proxy *curproxy, + struct lf_expr *lf_expr, + int options, int cap, char **err) +{ + int ret; + + + /* reinit lf_expr (if previously set) */ + lf_expr_deinit(lf_expr); + + lf_expr->str = strdup(fmt); + if (!lf_expr->str) { + memprintf(err, "out of memory error"); + goto fail; + } + + /* Save some parsing infos to raise relevant error messages during + * postparsing if needed + */ + if (curproxy->conf.args.file) { + lf_expr->conf.file = strdup(curproxy->conf.args.file); + lf_expr->conf.line = curproxy->conf.args.line; + } + + ret = lf_expr_compile(lf_expr, &curproxy->conf.args, options, cap, err); + + if (!ret) + goto fail; + + if (!(curproxy->flags & PR_FL_CHECKED)) { + /* add the lf_expr to the proxy checks to delay postparsing + * since config-related proxy properties are not stable yet + */ + LIST_APPEND(&curproxy->conf.lf_checks, &lf_expr->list); + } + else { + /* probably called during runtime or with proxy already checked, + * perform the postcheck right away + */ + if (!lf_expr_postcheck(lf_expr, curproxy, err)) + goto fail; + } + return 1; + + fail: + lf_expr_deinit(lf_expr); + return 0; +} + +/* automatically resolves incompatible LOG_OPT options by taking into + * account current options and global options + */ +static inline void _lf_expr_postcheck_node_opt(int *options, int g_options) +{ + /* encoding is incompatible with HTTP option, so it is ignored + * if HTTP option is set, unless HTTP option wasn't set globally + * and encoding was set globally, which means encoding takes the + * precedence> + */ + if (*options & LOG_OPT_HTTP) { + if ((g_options & (LOG_OPT_HTTP | LOG_OPT_ENCODE)) == LOG_OPT_ENCODE) { + /* global encoding enabled and http enabled individually */ + *options &= ~LOG_OPT_HTTP; + } + else + *options &= ~LOG_OPT_ENCODE; + } + + if (*options & LOG_OPT_ENCODE) { + /* when encoding is set, ignore +E option */ + *options &= ~LOG_OPT_ESC; + } +} + +/* Performs LOG_OPT postparsing check on logformat node <node> belonging to a + * given logformat expression <lf_expr> + * + * It returns 1 on success and 0 on error, <err> will be set in case of error + */ +static int lf_expr_postcheck_node_opt(struct lf_expr *lf_expr, struct logformat_node *node, char **err) +{ + /* per-node encoding options cannot be disabled if already + * enabled globally + * + * Also, ensure we don't mix encoding types, global setting + * prevails over per-node one. + * + * Finally, ignore LOG_OPT_BIN since it is a global-only option + */ + if (lf_expr->nodes.options & LOG_OPT_ENCODE) { + node->options &= ~(LOG_OPT_BIN | LOG_OPT_ENCODE); + node->options |= (lf_expr->nodes.options & LOG_OPT_ENCODE); + } + else + node->options &= ~LOG_OPT_BIN; + + _lf_expr_postcheck_node_opt(&node->options, lf_expr->nodes.options); + + return 1; +} + +/* Performs a postparsing check on logformat expression <expr> for a given <px> + * proxy. The function will behave differently depending on the proxy state + * (during parsing we will try to adapt proxy configuration to make it + * compatible with logformat expression, but once the proxy is checked, we fail + * as soon as we face incompatibilities) + * + * It returns 1 on success and 0 on error, <err> will be set in case of error. + */ +int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err) +{ + struct logformat_node *lf; + + if (!(px->flags & PR_FL_CHECKED)) + px->to_log |= LW_INIT; + + /* postcheck global node options */ + _lf_expr_postcheck_node_opt(&lf_expr->nodes.options, LOG_OPT_NONE); + + list_for_each_entry(lf, &lf_expr->nodes.list, list) { + if (lf->type == LOG_FMT_EXPR) { + struct sample_expr *expr = lf->expr; + uint8_t http_needed = !!(expr->fetch->use & SMP_USE_HTTP_ANY); + + if ((px->flags & PR_FL_CHECKED)) { + /* fail as soon as proxy properties are not compatible */ + if (http_needed && !px->http_needed) { + memprintf(err, "sample fetch '%s' requires HTTP enabled proxy which is not available here", + expr->fetch->kw); + goto fail; + } + goto next_node; + } + /* check if we need to allocate an http_txn struct for HTTP parsing */ + /* Note, we may also need to set curpx->to_log with certain fetches */ + px->http_needed |= http_needed; + + /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags + * needed with some sample fetches (eg: ssl*). We always set it for + * now on, but this will leave with sample capabilities soon. + */ + px->to_log |= LW_XPRT; + if (px->http_needed) + px->to_log |= LW_REQ; + } + else if (lf->type == LOG_FMT_ALIAS) { + if (lf->alias->mode == PR_MODE_HTTP && px->mode != PR_MODE_HTTP) { + memprintf(err, "format alias '%s' is reserved for HTTP mode", + lf->alias->name); + goto fail; + } + if (lf->alias->config_callback && + !lf->alias->config_callback(lf, px)) { + memprintf(err, "cannot configure format alias '%s' in this context", + lf->alias->name); + goto fail; + } + if (!(px->flags & PR_FL_CHECKED)) + px->to_log |= lf->alias->lw; + } + next_node: + /* postcheck individual node's options */ + if (!lf_expr_postcheck_node_opt(lf_expr, lf, err)) + goto fail; + } + if ((px->to_log & (LW_REQ | LW_RESP)) && + (px->mode != PR_MODE_HTTP && !(px->options & PR_O_HTTP_UPG))) { + memprintf(err, "logformat expression not usable here (at least one node depends on HTTP mode)"); goto fail; } - free(backfmt); return 1; fail: - free(backfmt); return 0; } +/* postparse logformats defined at <px> level */ +static int postcheck_logformat_proxy(struct proxy *px) +{ + char *err = NULL; + struct lf_expr *lf_expr, *back_lf; + int err_code = ERR_NONE; + + list_for_each_entry_safe(lf_expr, back_lf, &px->conf.lf_checks, list) { + BUG_ON(!(lf_expr->flags & LF_FL_COMPILED)); + if (!lf_expr_postcheck(lf_expr, px, &err)) + err_code |= ERR_FATAL | ERR_ALERT; + /* check performed, ensure it doesn't get checked twice */ + LIST_DEL_INIT(&lf_expr->list); + if (err_code & ERR_CODE) + break; + } + + if (err) { + memprintf(&err, "error detected while postparsing logformat expression used by %s '%s' : %s", proxy_type_str(px), px->id, err); + if (lf_expr->conf.file) + memprintf(&err, "parsing [%s:%d] : %s.\n", lf_expr->conf.file, lf_expr->conf.line, err); + ha_alert("%s", err); + ha_free(&err); + } + + return err_code; +} + /* * Parse the first range of indexes from a string made of a list of comma separated * ranges of indexes. Note that an index may be considered as a particular range @@ -775,110 +1157,6 @@ static int dup_log_target(struct log_target *def, struct log_target *cpy) return 0; } -/* must be called under the lbprm lock */ -static void _log_backend_srv_queue(struct server *srv) -{ - struct proxy *p = srv->proxy; - - /* queue the server in the proxy lb array to make it easily searchable by - * log-balance algorithms. Here we use the srv array as a general server - * pool of in-use servers, lookup is done using a relative positional id - * (array is contiguous) - * - * We use the avail server list to get a quick hand on available servers - * (those that are UP) - */ - if (srv->flags & SRV_F_BACKUP) { - if (!p->srv_act) - p->lbprm.log.srv[p->srv_bck] = srv; - p->srv_bck++; - } - else { - if (!p->srv_act) { - /* we will be switching to act tree in LB logic, thus we need to - * reset the lastid - */ - HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0); - } - p->lbprm.log.srv[p->srv_act] = srv; - p->srv_act++; - } - /* append the server to the list of available servers */ - LIST_APPEND(&p->lbprm.log.avail, &srv->lb_list); - - p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck; -} - -static void log_backend_srv_up(struct server *srv) -{ - struct proxy *p __maybe_unused = srv->proxy; - - if (!srv_lb_status_changed(srv)) - return; /* nothing to do */ - if (srv_currently_usable(srv) || !srv_willbe_usable(srv)) - return; /* false alarm */ - - HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); - _log_backend_srv_queue(srv); - HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); -} - -/* must be called under lbprm lock */ -static void _log_backend_srv_recalc(struct proxy *p) -{ - unsigned int it = 0; - struct server *cur_srv; - - list_for_each_entry(cur_srv, &p->lbprm.log.avail, lb_list) { - uint8_t backup = cur_srv->flags & SRV_F_BACKUP; - - if ((!p->srv_act && backup) || - (p->srv_act && !backup)) - p->lbprm.log.srv[it++] = cur_srv; - } -} - -/* must be called under the lbprm lock */ -static void _log_backend_srv_dequeue(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->flags & SRV_F_BACKUP) { - p->srv_bck--; - } - else { - p->srv_act--; - if (!p->srv_act) { - /* we will be switching to bck tree in LB logic, thus we need to - * reset the lastid - */ - HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0); - } - } - - /* remove the srv from the list of available (UP) servers */ - LIST_DELETE(&srv->lb_list); - - /* reconstruct the array of usable servers */ - _log_backend_srv_recalc(p); - - p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck; -} - -static void log_backend_srv_down(struct server *srv) -{ - struct proxy *p __maybe_unused = srv->proxy; - - if (!srv_lb_status_changed(srv)) - return; /* nothing to do */ - if (!srv_currently_usable(srv) || srv_willbe_usable(srv)) - return; /* false alarm */ - - HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); - _log_backend_srv_dequeue(srv); - HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); -} - /* check that current configuration is compatible with "mode log" */ static int _postcheck_log_backend_compat(struct proxy *be) { @@ -943,8 +1221,11 @@ static int _postcheck_log_backend_compat(struct proxy *be) } if (balance_algo != BE_LB_ALGO_RR && balance_algo != BE_LB_ALGO_RND && - balance_algo != BE_LB_ALGO_LS && + balance_algo != BE_LB_ALGO_SS && balance_algo != BE_LB_ALGO_LH) { + /* cannot correct the error since lbprm init was already performed + * in cfgparse.c, so fail loudly + */ ha_alert("in %s '%s': \"balance\" only supports 'roundrobin', 'random', 'sticky' and 'log-hash'.\n", proxy_type_str(be), be->id); err_code |= ERR_ALERT | ERR_FATAL; } @@ -966,30 +1247,6 @@ static int postcheck_log_backend(struct proxy *be) if (err_code & ERR_CODE) return err_code; - /* First time encountering this log backend, perform some init - */ - be->lbprm.set_server_status_up = log_backend_srv_up; - be->lbprm.set_server_status_down = log_backend_srv_down; - be->lbprm.log.lastid = 0; /* initial value */ - LIST_INIT(&be->lbprm.log.avail); - - /* alloc srv array (it will be used for active and backup server lists in turn, - * so we ensure that the longest list will fit - */ - be->lbprm.log.srv = calloc(MAX(be->srv_act, be->srv_bck), - sizeof(*be->lbprm.log.srv)); - - if (!be->lbprm.log.srv ) { - memprintf(&msg, "memory error when allocating server array (%d entries)", - MAX(be->srv_act, be->srv_bck)); - err_code |= ERR_ALERT | ERR_FATAL; - goto end; - } - - /* reinit srv counters, lbprm queueing will recount */ - be->srv_act = 0; - be->srv_bck = 0; - /* "log-balance hash" needs to compile its expression */ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) { struct sample_expr *expr; @@ -1100,13 +1357,10 @@ static int postcheck_log_backend(struct proxy *be) goto end; } srv->log_target->flags |= LOG_TARGET_FL_RESOLVED; - srv->cur_eweight = 1; /* ignore weights, all servers have the same weight */ - _log_backend_srv_queue(srv); srv = srv->next; } end: if (err_code & ERR_CODE) { - ha_free(&be->lbprm.log.srv); /* free log servers array */ ha_alert("log backend '%s': failed to initialize: %s.\n", be->id, msg); ha_free(&msg); } @@ -1171,6 +1425,7 @@ struct logger *dup_logger(struct logger *def) /* default values */ cpy->conf.file = NULL; + cpy->lb.smp_rgs = NULL; LIST_INIT(&cpy->list); /* special members */ @@ -1181,6 +1436,13 @@ struct logger *dup_logger(struct logger *def) if (!cpy->conf.file) goto error; } + if (def->lb.smp_rgs) { + cpy->lb.smp_rgs = malloc(sizeof(*cpy->lb.smp_rgs) * def->lb.smp_rgs_sz); + if (!cpy->lb.smp_rgs) + goto error; + memcpy(cpy->lb.smp_rgs, def->lb.smp_rgs, + sizeof(*cpy->lb.smp_rgs) * def->lb.smp_rgs_sz); + } /* inherit from original reference if set */ cpy->ref = (def->ref) ? def->ref : def; @@ -1204,6 +1466,7 @@ void free_logger(struct logger *logger) BUG_ON(LIST_INLIST(&logger->list)); ha_free(&logger->conf.file); deinit_log_target(&logger->target); + free(logger->lb.smp_rgs); free(logger); } @@ -1564,127 +1827,440 @@ int get_log_facility(const char *fac) return facility; } -/* - * Encode the string. +struct lf_buildctx { + char _buf[256];/* fixed size buffer for building small strings */ + int options; /* LOG_OPT_* options */ + int typecast; /* same as logformat_node->typecast */ + int in_text; /* inside variable-length text */ + union { + struct cbor_encode_ctx cbor; /* cbor-encode specific ctx */ + } encode; +}; + +static THREAD_LOCAL struct lf_buildctx lf_buildctx; + +/* helper to encode a single byte in hex form * - * When using the +E log format option, it will try to escape '"\]' - * characters with '\' as prefix. The same prefix should not be used as - * <escape>. + * Returns the position of the last written byte on success and NULL on + * error. */ -static char *lf_encode_string(char *start, char *stop, - const char escape, const long *map, - const char *string, - struct logformat_node *node) +static char *_encode_byte_hex(char *start, char *stop, unsigned char byte) { - if (node->options & LOG_OPT_ESC) { - if (start < stop) { - stop--; /* reserve one byte for the final '\0' */ - while (start < stop && *string != '\0') { - if (!ha_bit_test((unsigned char)(*string), map)) { - if (!ha_bit_test((unsigned char)(*string), rfc5424_escape_map)) - *start++ = *string; - else { - if (start + 2 >= stop) - break; - *start++ = '\\'; - *start++ = *string; - } - } - else { - if (start + 3 >= stop) - break; - *start++ = escape; - *start++ = hextab[(*string >> 4) & 15]; - *start++ = hextab[*string & 15]; - } - string++; - } - *start = '\0'; - } + /* hex form requires 2 bytes */ + if ((stop - start) < 2) + return NULL; + *start++ = hextab[(byte >> 4) & 15]; + *start++ = hextab[byte & 15]; + return start; +} + +/* lf cbor function ptr used to encode a single byte according to RFC8949 + * + * for now only hex form is supported. + * + * The function may only be called under CBOR context (that is when + * LOG_OPT_ENCODE_CBOR option is set). + * + * Returns the position of the last written byte on success and NULL on + * error. + */ +static char *_lf_cbor_encode_byte(struct cbor_encode_ctx *cbor_ctx, + char *start, char *stop, unsigned char byte) +{ + struct lf_buildctx *ctx; + + BUG_ON(!cbor_ctx || !cbor_ctx->e_fct_ctx); + ctx = cbor_ctx->e_fct_ctx; + + if (ctx->options & LOG_OPT_BIN) { + /* raw output */ + if ((stop - start) < 1) + return NULL; + *start++ = byte; + return start; + } + return _encode_byte_hex(start, stop, byte); +} + +/* helper function to prepare lf_buildctx struct based on global options + * and current node settings (may be NULL) + */ +static inline void lf_buildctx_prepare(struct lf_buildctx *ctx, + int g_options, + const struct logformat_node *node) +{ + if (node) { + /* consider node's options and typecast setting */ + ctx->options = node->options; + ctx->typecast = node->typecast; } else { - return encode_string(start, stop, escape, map, string); + ctx->options = g_options; + ctx->typecast = SMP_T_SAME; /* default */ + } + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* prepare cbor-specific encode ctx */ + ctx->encode.cbor.e_fct_byte = _lf_cbor_encode_byte; + ctx->encode.cbor.e_fct_ctx = ctx; + } +} + +/* helper function for _lf_encode_bytes() to escape a single byte + * with <escape> + */ +static inline char *_lf_escape_byte(char *start, char *stop, + char byte, const char escape) +{ + if (start + 3 >= stop) + return NULL; + *start++ = escape; + *start++ = hextab[(byte >> 4) & 15]; + *start++ = hextab[byte & 15]; + + return start; +} + +/* helper function for _lf_encode_bytes() to escape a single byte + * with <escape> and deal with cbor-specific encoding logic + */ +static inline char *_lf_cbor_escape_byte(char *start, char *stop, + char byte, const char escape, + uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + char escaped_byte[3]; + + escaped_byte[0] = escape; + escaped_byte[1] = hextab[(byte >> 4) & 15]; + escaped_byte[2] = hextab[byte & 15]; + + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + escaped_byte, 3, + cbor_string_prefix); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_map_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) + *start++ = *byte; + else + start = _lf_escape_byte(start, stop, *byte, escape); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> and deal with + * cbor-specific encoding logic. + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_cbor_map_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + /* We try our best to minimize the number of chunks produced for the + * indefinite-length byte string as each chunk has an extra overhead + * as per RFC8949. + * + * To achieve that, we try to emit consecutive bytes together + */ + if (!ha_bit_test((unsigned char)(*byte), map)) { + /* do nothing and let the caller continue seeking data, + * pending data will be flushed later + */ + } else { + /* first, flush pending unescaped bytes */ + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + *pending, (byte - *pending), + cbor_string_prefix); + if (start == NULL) + return NULL; + + *pending = byte + 1; + + /* escape current matching byte */ + start = _lf_cbor_escape_byte(start, stop, *byte, escape, + cbor_string_prefix, + ctx); + } + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> or escape it with + * '\' if found in rfc5424_escape_map + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_rfc5424_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) { + if (!ha_bit_test((unsigned char)(*byte), rfc5424_escape_map)) + *start++ = *byte; + else { + if (start + 2 >= stop) + return NULL; + *start++ = '\\'; + *start++ = *byte; + } + } + else + start = _lf_escape_byte(start, stop, *byte, escape); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> or escape it with + * '\' if found in json_escape_map + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_json_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) { + if (!ha_bit_test((unsigned char)(*byte), json_escape_map)) + *start++ = *byte; + else { + if (start + 2 >= stop) + return NULL; + *start++ = '\\'; + *start++ = *byte; + } } + else + start = _lf_escape_byte(start, stop, *byte, escape); return start; } /* - * Encode the chunk. + * helper for lf_encode_{string,chunk}: + * encode the input bytes, input <bytes> is processed until <bytes_stop> + * is reached. If <bytes_stop> is NULL, <bytes> is expected to be NULL + * terminated. * * When using the +E log format option, it will try to escape '"\]' * characters with '\' as prefix. The same prefix should not be used as * <escape>. + * + * When using json encoding, string will be escaped according to + * json escape map + * + * When using cbor encoding, escape option is ignored. However bytes found + * in <map> will still be escaped with <escape>. + * + * Return the address of the \0 character, or NULL on error */ -static char *lf_encode_chunk(char *start, char *stop, - const char escape, const long *map, - const struct buffer *chunk, - struct logformat_node *node) +static char *_lf_encode_bytes(char *start, char *stop, + const char escape, const long *map, + const char *bytes, const char *bytes_stop, + struct lf_buildctx *ctx) { - char *str, *end; - - if (node->options & LOG_OPT_ESC) { - if (start < stop) { - str = chunk->area; - end = chunk->area + chunk->data; - - stop--; /* reserve one byte for the final '\0' */ - while (start < stop && str < end) { - if (!ha_bit_test((unsigned char)(*str), map)) { - if (!ha_bit_test((unsigned char)(*str), rfc5424_escape_map)) - *start++ = *str; - else { - if (start + 2 >= stop) - break; - *start++ = '\\'; - *start++ = *str; - } - } - else { - if (start + 3 >= stop) - break; - *start++ = escape; - *start++ = hextab[(*str >> 4) & 15]; - *start++ = hextab[*str & 15]; - } - str++; - } - *start = '\0'; + char *ret; + const char *pending; + uint8_t cbor_string_prefix = 0; + char *(*encode_byte)(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx); + + if (ctx->options & LOG_OPT_ENCODE_JSON) + encode_byte = _lf_json_escape_byte; + else if (ctx->options & LOG_OPT_ENCODE_CBOR) + encode_byte = _lf_cbor_map_escape_byte; + else if (ctx->options & LOG_OPT_ESC) + encode_byte = _lf_rfc5424_escape_byte; + else + encode_byte = _lf_map_escape_byte; + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (!bytes_stop) { + /* printable chars: use cbor text */ + cbor_string_prefix = 0x60; + } + else { + /* non printable chars: use cbor byte string */ + cbor_string_prefix = 0x40; } } - else { - return encode_chunk(start, stop, escape, map, chunk); + + if (start < stop) { + stop--; /* reserve one byte for the final '\0' */ + + if ((ctx->options & LOG_OPT_ENCODE_CBOR) && !ctx->in_text) { + /* start indefinite-length cbor byte string or text */ + start = _lf_cbor_encode_byte(&ctx->encode.cbor, start, stop, + (cbor_string_prefix | 0x1F)); + if (start == NULL) + return NULL; + } + pending = bytes; + + /* we have 2 distinct loops to keep checks outside of the loop + * for better performance + */ + if (bytes && !bytes_stop) { + while (start < stop && *bytes != '\0') { + ret = encode_byte(start, stop, bytes, escape, map, + &pending, cbor_string_prefix, + ctx); + if (ret == NULL) + break; + start = ret; + bytes++; + } + } else if (bytes) { + while (start < stop && bytes < bytes_stop) { + ret = encode_byte(start, stop, bytes, escape, map, + &pending, cbor_string_prefix, + ctx); + if (ret == NULL) + break; + start = ret; + bytes++; + } + } + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (pending != bytes) { + /* flush pending unescaped bytes */ + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + pending, (bytes - pending), + cbor_string_prefix); + if (start == NULL) + return NULL; + } + if (!ctx->in_text) { + /* cbor break (to end indefinite-length text or byte string) */ + start = _lf_cbor_encode_byte(&ctx->encode.cbor, start, stop, 0xFF); + if (start == NULL) + return NULL; + } + } + + *start = '\0'; + return start; } - return start; + return NULL; } /* - * Write a string in the log string - * Take cares of quote and escape options + * Encode the string. + */ +static char *lf_encode_string(char *start, char *stop, + const char escape, const long *map, + const char *string, + struct lf_buildctx *ctx) +{ + return _lf_encode_bytes(start, stop, escape, map, + string, NULL, ctx); +} + +/* + * Encode the chunk. + */ +static char *lf_encode_chunk(char *start, char *stop, + const char escape, const long *map, + const struct buffer *chunk, + struct lf_buildctx *ctx) +{ + return _lf_encode_bytes(start, stop, escape, map, + chunk->area, chunk->area + chunk->data, + ctx); +} + +/* + * Write a raw string in the log string + * Take care of escape option + * + * When using json encoding, string will be escaped according + * to json escape map + * + * When using cbor encoding, escape option is ignored. * * Return the address of the \0 character, or NULL on error */ -char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node) +static inline char *_lf_text_len(char *dst, const char *src, + size_t len, size_t size, struct lf_buildctx *ctx) { - if (size < 2) - return NULL; + const long *escape_map = NULL; + char *ret; - if (node->options & LOG_OPT_QUOTE) { - *(dst++) = '"'; - size--; - } + if (ctx->options & LOG_OPT_ENCODE_JSON) + escape_map = json_escape_map; + else if (ctx->options & LOG_OPT_ESC) + escape_map = rfc5424_escape_map; if (src && len) { + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* it's actually less costly to compute the actual text size to + * write a single fixed length text at once rather than emitting + * indefinite length text in cbor, because indefinite-length text + * has to be made of multiple chunks of known size as per RFC8949... + */ + { + int _len; + + /* strnlen(src, len) portable equivalent: */ + for (_len = 0; _len < len && src[_len]; _len++) + ; + + len = _len; + } + + ret = cbor_encode_text(&ctx->encode.cbor, dst, dst + size, src, len); + if (ret == NULL) + return NULL; + len = ret - dst; + } + /* escape_string and strlcpy2 will both try to add terminating NULL-byte - * to dst, so we need to make sure that extra byte will fit into dst - * before calling them + * to dst */ - if (node->options & LOG_OPT_ESC) { + else if (escape_map) { char *ret; - ret = escape_string(dst, (dst + size - 1), '\\', rfc5424_escape_map, src, src + len); - if (ret == NULL || *ret != '\0') + ret = escape_string(dst, dst + size, '\\', escape_map, src, src + len); + if (ret == NULL) return NULL; len = ret - dst; } @@ -1692,90 +2268,276 @@ char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const str if (++len > size) len = size; len = strlcpy2(dst, src, len); + if (len == 0) + return NULL; } - - size -= len; dst += len; + size -= len; } - else if ((node->options & (LOG_OPT_QUOTE|LOG_OPT_MANDATORY)) == LOG_OPT_MANDATORY) { - if (size < 2) - return NULL; - *(dst++) = '-'; - size -= 1; - } - if (node->options & LOG_OPT_QUOTE) { - if (size < 2) + if (size < 1) + return NULL; + *dst = '\0'; + + return dst; +} + +/* + * Quote a string, then leverage _lf_text_len() to write it + */ +static inline char *_lf_quotetext_len(char *dst, const char *src, + size_t len, size_t size, struct lf_buildctx *ctx) +{ + if (size < 2) + return NULL; + + *(dst++) = '"'; + size--; + + if (src && len) { + char *ret; + + ret = _lf_text_len(dst, src, len, size, ctx); + if (ret == NULL) return NULL; - *(dst++) = '"'; + size -= (ret - dst); + dst += (ret - dst); } + if (size < 2) + return NULL; + *(dst++) = '"'; + + *dst = '\0'; + return dst; +} + +/* + * Write a string in the log string + * Take care of quote, mandatory and escape and encoding options + * + * Return the address of the \0 character, or NULL on error + */ +static char *lf_text_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx) +{ + if ((ctx->options & (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON))) + return _lf_quotetext_len(dst, src, len, size, ctx); + else if ((ctx->options & LOG_OPT_ENCODE_CBOR) || + (src && len)) + return _lf_text_len(dst, src, len, size, ctx); + + if (size < 2) + return NULL; + + if ((ctx->options & LOG_OPT_MANDATORY)) + return _lf_text_len(dst, "-", 1, size, ctx); + *dst = '\0'; + return dst; } -static inline char *lf_text(char *dst, const char *src, size_t size, const struct logformat_node *node) +/* + * Same as lf_text_len() except that it ignores mandatory and quoting options. + * Quoting is only performed when strictly required by the encoding method. + */ +static char *lf_rawtext_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx) +{ + if (!ctx->in_text && + (ctx->options & LOG_OPT_ENCODE_JSON)) + return _lf_quotetext_len(dst, src, len, size, ctx); + return _lf_text_len(dst, src, len, size, ctx); +} + +/* lf_text_len() helper when <src> is null-byte terminated */ +static inline char *lf_text(char *dst, const char *src, size_t size, struct lf_buildctx *ctx) { - return lf_text_len(dst, src, size, size, node); + return lf_text_len(dst, src, size, size, ctx); +} + +/* lf_rawtext_len() helper when <src> is null-byte terminated */ +static inline char *lf_rawtext(char *dst, const char *src, size_t size, struct lf_buildctx *ctx) +{ + return lf_rawtext_len(dst, src, size, size, ctx); } /* * Write a IP address to the log string * +X option write in hexadecimal notation, most significant byte on the left */ -char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node) +static char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, struct lf_buildctx *ctx) { char *ret = dst; int iret; char pn[INET6_ADDRSTRLEN]; - if (node->options & LOG_OPT_HEXA) { + if (ctx->options & LOG_OPT_HEXA) { unsigned char *addr = NULL; switch (sockaddr->sa_family) { case AF_INET: + { addr = (unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_addr.s_addr; - iret = snprintf(dst, size, "%02X%02X%02X%02X", addr[0], addr[1], addr[2], addr[3]); + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X%02X%02X%02X", + addr[0], addr[1], addr[2], addr[3]); + if (iret < 0 || iret >= size) + return NULL; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); + break; + } case AF_INET6: + { addr = (unsigned char *)&((struct sockaddr_in6 *)sockaddr)->sin6_addr.s6_addr; - iret = snprintf(dst, size, "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7], - addr[8], addr[9], addr[10], addr[11], addr[12], addr[13], addr[14], addr[15]); + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), + "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", + addr[0], addr[1], addr[2], addr[3], + addr[4], addr[5], addr[6], addr[7], + addr[8], addr[9], addr[10], addr[11], + addr[12], addr[13], addr[14], addr[15]); + if (iret < 0 || iret >= size) + return NULL; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); + break; + } default: return NULL; } - if (iret < 0 || iret > size) - return NULL; - ret += iret; } else { addr_to_str((struct sockaddr_storage *)sockaddr, pn, sizeof(pn)); - ret = lf_text(dst, pn, size, node); - if (ret == NULL) - return NULL; + ret = lf_text(dst, pn, size, ctx); } return ret; } +/* Logformat expr wrapper to write a boolean according to node + * encoding settings + */ +static char *lf_bool_encode(char *dst, size_t size, uint8_t value, + struct lf_buildctx *ctx) +{ + /* encode as a regular bool value */ + + if (ctx->options & LOG_OPT_ENCODE_JSON) { + char *ret = dst; + int iret; + + if (value) + iret = snprintf(dst, size, "true"); + else + iret = snprintf(dst, size, "false"); + + if (iret < 0 || iret >= size) + return NULL; + ret += iret; + return ret; + } + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (value) + return _lf_cbor_encode_byte(&ctx->encode.cbor, dst, dst + size, 0xF5); + return _lf_cbor_encode_byte(&ctx->encode.cbor, dst, dst + size, 0xF4); + } + + return NULL; /* not supported */ +} + +/* Logformat expr wrapper to write an integer according to node + * encoding settings and typecast settings. + */ +static char *lf_int_encode(char *dst, size_t size, int64_t value, + struct lf_buildctx *ctx) +{ + if (ctx->typecast == SMP_T_BOOL) { + /* either true or false */ + return lf_bool_encode(dst, size, !!value, ctx); + } + + if (ctx->options & LOG_OPT_ENCODE_JSON) { + char *ret = dst; + int iret = 0; + + if (ctx->typecast == SMP_T_STR) { + /* encode as a string number (base10 with "quotes"): + * may be useful to work around the limited resolution + * of JS number types for instance + */ + iret = snprintf(dst, size, "\"%lld\"", (long long int)value); + } + else { + /* encode as a regular int64 number (base10) */ + iret = snprintf(dst, size, "%lld", (long long int)value); + } + + if (iret < 0 || iret >= size) + return NULL; + ret += iret; + + return ret; + } + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* Always print as a regular int64 number (STR typecast isn't + * supported) + */ + return cbor_encode_int64(&ctx->encode.cbor, dst, dst + size, value); + } + + return NULL; /* not supported */ +} + +enum lf_int_hdl { + LF_INT_LTOA = 0, + LF_INT_LLTOA, + LF_INT_ULTOA, + LF_INT_UTOA_PAD_4, +}; + +/* + * Logformat expr wrapper to write an integer, uses <dft_hdl> to know + * how to encode the value by default (if no encoding is used) + */ +static inline char *lf_int(char *dst, size_t size, int64_t value, + struct lf_buildctx *ctx, + enum lf_int_hdl dft_hdl) +{ + if (ctx->options & LOG_OPT_ENCODE) + return lf_int_encode(dst, size, value, ctx); + + switch (dft_hdl) { + case LF_INT_LTOA: + return ltoa_o(value, dst, size); + case LF_INT_LLTOA: + return lltoa(value, dst, size); + case LF_INT_ULTOA: + return ultoa_o(value, dst, size); + case LF_INT_UTOA_PAD_4: + { + if (size < 4) + return NULL; + return utoa_pad(value, dst, 4); + } + } + return NULL; +} + /* * Write a port to the log * +X option write in hexadecimal notation, most significant byte on the left */ -char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node) +static char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, struct lf_buildctx *ctx) { char *ret = dst; int iret; - if (node->options & LOG_OPT_HEXA) { + if (ctx->options & LOG_OPT_HEXA) { const unsigned char *port = (const unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_port; - iret = snprintf(dst, size, "%02X%02X", port[0], port[1]); - if (iret < 0 || iret > size) + + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X%02X", port[0], port[1]); + if (iret < 0 || iret >= size) return NULL; - ret += iret; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); } else { - ret = ltoa_o(get_host_port((struct sockaddr_storage *)sockaddr), dst, size); - if (ret == NULL) - return NULL; + ret = lf_int(dst, size, get_host_port((struct sockaddr_storage *)sockaddr), + ctx, LF_INT_LTOA); } return ret; } @@ -2255,51 +3017,25 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr int nblogger, size_t maxlen, char *message, size_t size) { - struct server *srv; - uint32_t targetid = ~0; /* default value to check if it was explicitly assigned */ - uint32_t nb_srv; - - HA_RWLOCK_RDLOCK(LBPRM_LOCK, &be->lbprm.lock); - - if (be->srv_act) { - nb_srv = be->srv_act; - } - else if (be->srv_bck) { - /* no more active servers but backup ones are, switch to backup farm */ - nb_srv = be->srv_bck; - if (!(be->options & PR_O_USE_ALL_BK)) { - /* log balancing disabled on backup farm */ - targetid = 0; /* use first server */ - goto skip_lb; - } - } - else { - /* no srv available, can't log */ - goto drop; - } + struct server *srv = NULL; /* log-balancing logic: */ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RR) { - /* Atomically load and update lastid since it's not protected - * by any write lock - * - * Wrapping is expected and could lead to unexpected ID reset in the - * middle of a cycle, but given that this only happens once in every - * 4 billions it is quite negligible - */ - targetid = HA_ATOMIC_FETCH_ADD(&be->lbprm.log.lastid, 1) % nb_srv; + srv = fwrr_get_next_server(be, NULL); } - else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LS) { + else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SS) { /* sticky mode: use first server in the pool, which will always stay * first during dequeuing and requeuing, unless it becomes unavailable * and will be replaced by another one */ - targetid = 0; + srv = ss_get_server(be); } else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RND) { - /* random mode */ - targetid = statistical_prng() % nb_srv; + unsigned int hash; + + hash = statistical_prng(); /* random */ + srv = chash_get_server_hash(be, hash, NULL); } else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) { struct sample result; @@ -2314,28 +3050,24 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr if (sample_process_cnv(be->lbprm.expr, &result)) { /* gen_hash takes binary input, ensure that we provide such value to it */ if (result.data.type == SMP_T_BIN || sample_casts[result.data.type][SMP_T_BIN]) { + unsigned int hash; + sample_casts[result.data.type][SMP_T_BIN](&result); - targetid = gen_hash(be, result.data.u.str.area, result.data.u.str.data) % nb_srv; + hash = gen_hash(be, result.data.u.str.area, result.data.u.str.data); + srv = map_get_server_hash(be, hash); } } } - skip_lb: - - if (targetid == ~0) { - /* no target assigned, nothing to do */ + if (!srv) { + /* no srv available, can't log */ goto drop; } - /* find server based on targetid */ - srv = be->lbprm.log.srv[targetid]; - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock); - __do_send_log(srv->log_target, hdr, nblogger, maxlen, message, size); return; drop: - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock); _HA_ATOMIC_INC(&dropped_logs); } @@ -2347,7 +3079,7 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr * data to build the header. */ void process_send_log(struct list *loggers, int level, int facility, - struct ist *metadata, char *message, size_t size) + struct ist *metadata, char *message, size_t size) { struct logger *logger; int nblogger; @@ -2463,16 +3195,131 @@ const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found a Set-cookie Updated, unknown, unknown */ /* + * try to write a cbor byte if there is enough space, or goto out + */ +#define LOG_CBOR_BYTE(x) do { \ + ret = _lf_cbor_encode_byte(&ctx->encode.cbor, \ + tmplog, \ + dst + maxsize, \ + (x)); \ + if (ret == NULL) \ + goto out; \ + tmplog = ret; \ + } while (0) + +/* * try to write a character if there is enough space, or goto out */ #define LOGCHAR(x) do { \ - if (tmplog < dst + maxsize - 1) { \ - *(tmplog++) = (x); \ - } else { \ - goto out; \ - } \ + if ((ctx->options & LOG_OPT_ENCODE_CBOR) && \ + ctx->in_text) { \ + char _x[1]; \ + /* encode the char as text chunk since we \ + * cannot just throw random bytes and expect \ + * cbor decoder to know how to handle them \ + */ \ + _x[0] = (x); \ + ret = cbor_encode_text(&ctx->encode.cbor, \ + tmplog, \ + dst + maxsize, \ + _x, sizeof(_x)); \ + if (ret == NULL) \ + goto out; \ + tmplog = ret; \ + break; \ + } \ + if (tmplog < dst + maxsize - 1) { \ + *(tmplog++) = (x); \ + } else { \ + goto out; \ + } \ } while(0) +/* indicate that a new variable-length text is starting, sets in_text + * variable to indicate that a var text was started and deals with + * encoding and options to know if some special treatment is needed. + */ +#define LOG_VARTEXT_START() do { \ + ctx->in_text = 1; \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* start indefinite-length cbor text */ \ + LOG_CBOR_BYTE(0x7F); \ + break; \ + } \ + /* put the text within quotes if JSON encoding \ + * is used or quoting is enabled \ + */ \ + if (ctx->options & \ + (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \ + LOGCHAR('"'); \ + } \ + } while (0) + +/* properly finish a variable text that was started using LOG_VARTEXT_START + * checks the in_text variable to know if a text was started or not, and + * deals with encoding and options to know if some special treatment is + * needed. + */ +#define LOG_VARTEXT_END() do { \ + if (!ctx->in_text) \ + break; \ + ctx->in_text = 0; \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* end indefinite-length cbor text with break*/\ + LOG_CBOR_BYTE(0xFF); \ + break; \ + } \ + /* add the ending quote if JSON encoding is \ + * used or quoting is enabled \ + */ \ + if (ctx->options & \ + (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \ + LOGCHAR('"'); \ + } \ + } while (0) + +/* Prints additional logvalue hint represented by <chr>. + * It is useful to express that <chr> is not part of the "raw" value and + * should be considered as optional metadata instead. + */ +#define LOGMETACHAR(chr) do { \ + /* ignored when encoding is used */ \ + if (ctx->options & LOG_OPT_ENCODE) \ + break; \ + LOGCHAR(chr); \ + } while (0) + +/* indicate the start of a string array */ +#define LOG_STRARRAY_START() do { \ + if (ctx->options & LOG_OPT_ENCODE_JSON) \ + LOGCHAR('['); \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* start indefinite-length array */ \ + LOG_CBOR_BYTE(0x9F); \ + } \ + } while (0) + +/* indicate that a new element is added to the string array */ +#define LOG_STRARRAY_NEXT() do { \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) \ + break; \ + if (ctx->options & LOG_OPT_ENCODE_JSON) { \ + LOGCHAR(','); \ + LOGCHAR(' '); \ + } \ + else \ + LOGCHAR(' '); \ + } while (0) + +/* indicate the end of a string array */ +#define LOG_STRARRAY_END() do { \ + if (ctx->options & LOG_OPT_ENCODE_JSON) \ + LOGCHAR(']'); \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* cbor break */ \ + LOG_CBOR_BYTE(0xFF); \ + } \ + } while (0) /* Initializes some log data at boot */ static void init_log() @@ -2480,6 +3327,9 @@ static void init_log() char *tmp; int i; + /* Initialize the no escape map, which may be used to bypass escaping */ + memset(no_escape_map, 0, sizeof(no_escape_map)); + /* Initialize the escape map for the RFC5424 structured-data : '"\]' * inside PARAM-VALUE should be escaped with '\' as prefix. * See https://tools.ietf.org/html/rfc5424#section-6.3.3 for more @@ -2493,6 +3343,15 @@ static void init_log() tmp++; } + /* Initialize the escape map for JSON strings : '"\' */ + memset(json_escape_map, 0, sizeof(json_escape_map)); + + tmp = "\"\\"; + while (*tmp) { + ha_bit_set(*tmp, json_escape_map); + tmp++; + } + /* initialize the log header encoding map : '{|}"#' should be encoded with * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ). * URL encoding only requires '"', '#' to be encoded as well as non- @@ -2583,19 +3442,133 @@ void deinit_log_forward() } } -/* Builds a log line in <dst> based on <list_format>, and stops before reaching +/* Releases memory for a single log-format node */ +void free_logformat_node(struct logformat_node *node) +{ + if (!node) + return; + + release_sample_expr(node->expr); + node->expr = NULL; + ha_free(&node->name); + ha_free(&node->arg); + ha_free(&node); +} + +/* Releases memory allocated for a log-format string */ +void free_logformat_list(struct list *fmt) +{ + struct logformat_node *lf, *lfb; + + if ((fmt == NULL) || LIST_ISEMPTY(fmt)) + return; + + list_for_each_entry_safe(lf, lfb, fmt, list) { + LIST_DELETE(&lf->list); + free_logformat_node(lf); + } +} + +/* Prepares log-format expression struct */ +void lf_expr_init(struct lf_expr *expr) +{ + LIST_INIT(&expr->list); + expr->flags = LF_FL_NONE; + expr->str = NULL; + expr->conf.file = NULL; + expr->conf.line = 0; +} + +/* Releases and resets a log-format expression */ +void lf_expr_deinit(struct lf_expr *expr) +{ + if ((expr->flags & LF_FL_COMPILED)) + free_logformat_list(&expr->nodes.list); + else + logformat_str_free(&expr->str); + free(expr->conf.file); + /* remove from parent list (if any) */ + LIST_DEL_INIT(&expr->list); + + lf_expr_init(expr); +} + +/* Transfer a compiled log-format expression from <src> to <dst> + * at the end of the operation, <src> is reset + */ +void lf_expr_xfer(struct lf_expr *src, struct lf_expr *dst) +{ + struct logformat_node *lf, *lfb; + + /* first, reset any existing expr */ + lf_expr_deinit(dst); + + BUG_ON(!(src->flags & LF_FL_COMPILED)); + + /* then proceed with transfer between <src> and <dst> */ + dst->conf.file = src->conf.file; + dst->conf.line = src->conf.line; + + dst->flags |= LF_FL_COMPILED; + LIST_INIT(&dst->nodes.list); + + list_for_each_entry_safe(lf, lfb, &src->nodes.list, list) { + LIST_DELETE(&lf->list); + LIST_APPEND(&dst->nodes.list, &lf->list); + } + + /* replace <src> with <dst> in <src>'s list by first adding + * <dst> after <src>, then removing <src>... + */ + LIST_INSERT(&src->list, &dst->list); + LIST_DEL_INIT(&src->list); + + /* src is now empty, perform an explicit reset */ + lf_expr_init(src); +} + +/* tries to duplicate an uncompiled logformat expression from <orig> to <dest> + * + * Returns 1 on success and 0 on failure. + */ +int lf_expr_dup(const struct lf_expr *orig, struct lf_expr *dest) +{ + BUG_ON((orig->flags & LF_FL_COMPILED)); + lf_expr_deinit(dest); + if (orig->str) { + dest->str = logformat_str_dup(orig->str); + if (!dest->str) + goto error; + } + if (orig->conf.file) { + dest->conf.file = strdup(orig->conf.file); + if (!dest->conf.file) + goto error; + } + dest->conf.line = orig->conf.line; + + return 1; + + error: + lf_expr_deinit(dest); + return 0; +} + +/* Builds a log line in <dst> based on <lf_expr>, and stops before reaching * <maxsize> characters. Returns the size of the output string in characters, * not counting the trailing zero which is always added if the resulting size * is not zero. It requires a valid session and optionally a stream. If the * stream is NULL, default values will be assumed for the stream part. */ -int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format) +int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct lf_expr *lf_expr) { + struct lf_buildctx *ctx = &lf_buildctx; struct proxy *fe = sess->fe; struct proxy *be; struct http_txn *txn; const struct strm_logs *logs; struct connection *fe_conn, *be_conn; + struct list *list_format = &lf_expr->nodes.list; unsigned int s_flags; unsigned int uniq_id; struct buffer chunk; @@ -2617,6 +3590,8 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t struct strm_logs tmp_strm_log; struct ist path; struct http_uri_parser parser; + int g_options = lf_expr->nodes.options; /* global */ + int first_node = 1; /* FIXME: let's limit ourselves to frontend logging for now. */ @@ -2698,73 +3673,203 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t tmplog = dst; + /* reset static ctx struct */ + ctx->in_text = 0; + + /* start with global ctx by default */ + lf_buildctx_prepare(ctx, g_options, NULL); + /* fill logbuffer */ - if (LIST_ISEMPTY(list_format)) + if (!(ctx->options & LOG_OPT_ENCODE) && lf_expr_isempty(lf_expr)) return 0; + if (ctx->options & LOG_OPT_ENCODE_JSON) + LOGCHAR('{'); + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* start indefinite-length map */ + LOG_CBOR_BYTE(0xBF); + } + list_for_each_entry(tmp, list_format, list) { #ifdef USE_OPENSSL struct connection *conn; #endif const struct sockaddr_storage *addr; const char *src = NULL; + const char *value_beg = NULL; struct sample *key; - const struct buffer empty = { }; - switch (tmp->type) { - case LOG_FMT_SEPARATOR: - if (!last_isspace) { + /* first start with basic types (use continue statement to skip + * the current node) + */ + if (tmp->type == LOG_FMT_SEPARATOR) { + if (g_options & LOG_OPT_ENCODE) { + /* ignored when global encoding is set */ + continue; + } + if (!last_isspace) { + LOGCHAR(' '); + last_isspace = 1; + } + continue; + } + else if (tmp->type == LOG_FMT_TEXT) { + /* text */ + if (g_options & LOG_OPT_ENCODE) { + /* ignored when global encoding is set */ + continue; + } + src = tmp->arg; + iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); + if (iret == 0) + goto out; + tmplog += iret; + last_isspace = 0; /* data was written */ + continue; + } + + /* dynamic types handling (use "goto next_fmt" statement to skip + * the current node) + */ + + if (g_options & LOG_OPT_ENCODE) { + /* only consider global ctx for key encoding */ + lf_buildctx_prepare(ctx, g_options, NULL); + + if (!tmp->name) + goto next_fmt; /* cannot represent anonymous field, ignore */ + + if (!first_node) { + if (ctx->options & LOG_OPT_ENCODE_JSON) { + LOGCHAR(','); LOGCHAR(' '); - last_isspace = 1; } - break; + } - case LOG_FMT_TEXT: // text - src = tmp->arg; - iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE_JSON) { + LOGCHAR('"'); + iret = strlcpy2(tmplog, tmp->name, dst + maxsize - tmplog); if (iret == 0) goto out; tmplog += iret; - last_isspace = 0; - break; + LOGCHAR('"'); + LOGCHAR(':'); + LOGCHAR(' '); + } + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + ret = cbor_encode_text(&ctx->encode.cbor, tmplog, + dst + maxsize, tmp->name, + strlen(tmp->name)); + if (ret == NULL) + goto out; + tmplog = ret; + } - case LOG_FMT_EXPR: // sample expression, may be request or response - key = NULL; - if (tmp->options & LOG_OPT_REQ_CAP) - key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + first_node = 0; + } + value_beg = tmplog; + + /* get the chance to consider per-node options (if not already + * set globally) for printing the value + */ + lf_buildctx_prepare(ctx, g_options, tmp); - if (!key && (tmp->options & LOG_OPT_RES_CAP)) - key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + if (tmp->type == LOG_FMT_EXPR) { + /* sample expression, may be request or response */ + int type; - if (!key && !(tmp->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli - key = sample_fetch_as_type(be, sess, s, SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + key = NULL; + if (ctx->options & LOG_OPT_REQ_CAP) + key = sample_process(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, NULL); - if (tmp->options & LOG_OPT_HTTP) + if (!key && (ctx->options & LOG_OPT_RES_CAP)) + key = sample_process(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, NULL); + + if (!key && !(ctx->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli + key = sample_process(be, sess, s, SMP_OPT_FINAL, tmp->expr, NULL); + + type = SMP_T_STR; // default + + if (key && key->data.type == SMP_T_BIN && + (ctx->options & LOG_OPT_BIN)) { + /* output type is binary, and binary option is set: + * preserve output type unless typecast is set to + * force output type to string + */ + if (ctx->typecast != SMP_T_STR) + type = SMP_T_BIN; + } + + /* if encoding is set, try to preserve output type + * with respect to typecast settings + * (ie: str, sint, bool) + * + * Special case for cbor encoding: we also try to + * preserve bin output type since cbor encoders + * know how to deal with binary data. + */ + if (ctx->options & LOG_OPT_ENCODE) { + if (ctx->typecast == SMP_T_STR || + ctx->typecast == SMP_T_SINT || + ctx->typecast == SMP_T_BOOL) { + /* enforce type */ + type = ctx->typecast; + } + else if (key && + (key->data.type == SMP_T_SINT || + key->data.type == SMP_T_BOOL || + ((ctx->options & LOG_OPT_ENCODE_CBOR) && + key->data.type == SMP_T_BIN))) { + /* preserve type */ + type = key->data.type; + } + } + + if (key && !sample_convert(key, type)) + key = NULL; + if (ctx->options & LOG_OPT_HTTP) + ret = lf_encode_chunk(tmplog, dst + maxsize, + '%', http_encode_map, key ? &key->data.u.str : &empty, ctx); + else { + if (key && type == SMP_T_BIN) ret = lf_encode_chunk(tmplog, dst + maxsize, - '%', http_encode_map, key ? &key->data.u.str : &empty, tmp); + 0, no_escape_map, + &key->data.u.str, + ctx); + else if (key && type == SMP_T_SINT) + ret = lf_int_encode(tmplog, dst + maxsize - tmplog, + key->data.u.sint, ctx); + else if (key && type == SMP_T_BOOL) + ret = lf_bool_encode(tmplog, dst + maxsize - tmplog, + key->data.u.sint, ctx); else ret = lf_text_len(tmplog, - key ? key->data.u.str.area : NULL, - key ? key->data.u.str.data : 0, - dst + maxsize - tmplog, - tmp); - if (ret == 0) - goto out; - tmplog = ret; - last_isspace = 0; - break; + key ? key->data.u.str.area : NULL, + key ? key->data.u.str.data : 0, + dst + maxsize - tmplog, + ctx); + } + if (ret == NULL) + goto out; + tmplog = ret; + last_isspace = 0; /* consider that data was written */ + goto next_fmt; + } + + BUG_ON(tmp->type != LOG_FMT_ALIAS); + /* logformat alias */ + switch (tmp->alias->type) { case LOG_FMT_CLIENTIP: // %ci addr = (s ? sc_src(s->scf) : sess_src(sess)); if (addr) - ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CLIENTPORT: // %cp @@ -2772,30 +3877,29 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (addr) { /* sess->listener is always defined when the session's owner is an inbound connections */ if (addr->ss_family == AF_UNIX) - ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, + sess->listener->luid, ctx, LF_INT_LTOA); else - ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); } else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTENDIP: // %fi addr = (s ? sc_dst(s->scf) : sess_dst(sess)); if (addr) - ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTENDPORT: // %fp @@ -2803,184 +3907,233 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (addr) { /* sess->listener is always defined when the session's owner is an inbound connections */ if (addr->ss_family == AF_UNIX) - ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, + sess->listener->luid, ctx, LF_INT_LTOA); else - ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); } else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BACKENDIP: // %bi if (be_conn && conn_get_src(be_conn)) - ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BACKENDPORT: // %bp if (be_conn && conn_get_src(be_conn)) - ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVERIP: // %si if (be_conn && conn_get_dst(be_conn)) - ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVERPORT: // %sp if (be_conn && conn_get_dst(be_conn)) - ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_DATE: // %t = accept date + { + // "26/Apr/2024:09:39:58.774" + get_localtime(logs->accept_date.tv_sec, &tm); - ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!date2str_log(ctx->_buf, &tm, &logs->accept_date, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_tr: // %tr = start of request date + { + // "26/Apr/2024:09:39:58.774" + /* Note that the timers are valid if we get here */ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_localtime(tv.tv_sec, &tm); - ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!date2str_log(ctx->_buf, &tm, &tv, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_DATEGMT: // %T = accept date, GMT + { + // "26/Apr/2024:07:41:11 +0000" + get_gmtime(logs->accept_date.tv_sec, &tm); - ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!gmt2str_log(ctx->_buf, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_trg: // %trg = start of request date, GMT + { + // "26/Apr/2024:07:41:11 +0000" + tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_gmtime(tv.tv_sec, &tm); - ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!gmt2str_log(ctx->_buf, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_DATELOCAL: // %Tl = accept date, local + { + // "26/Apr/2024:09:42:32 +0200" + get_localtime(logs->accept_date.tv_sec, &tm); - ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!localdate2str_log(ctx->_buf, logs->accept_date.tv_sec, + &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, + &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_trl: // %trl = start of request date, local + { + // "26/Apr/2024:09:42:32 +0200" + tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_localtime(tv.tv_sec, &tm); - ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!localdate2str_log(ctx->_buf, tv.tv_sec, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TS: // %Ts - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", (unsigned int)logs->accept_date.tv_sec); - if (iret < 0 || iret > dst + maxsize - tmplog) + { + unsigned long value = logs->accept_date.tv_sec; + + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", (unsigned int)value); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(logs->accept_date.tv_sec, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); } - break; + if (ret == NULL) + goto out; + tmplog = ret; + break; + } case LOG_FMT_MS: // %ms - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%02X",(unsigned int)logs->accept_date.tv_usec/1000); - if (iret < 0 || iret > dst + maxsize - tmplog) + { + unsigned int value = (unsigned int)logs->accept_date.tv_usec/1000; + + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X", value); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; - } else { - if ((dst + maxsize - tmplog) < 4) - goto out; - ret = utoa_pad((unsigned int)logs->accept_date.tv_usec/1000, - tmplog, 4); + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } else { + ret = lf_int(tmplog, dst + maxsize - tmplog, value, + ctx, LF_INT_UTOA_PAD_4); + } if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; + break; } - break; case LOG_FMT_FRONTEND: // %f src = fe->id; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTEND_XPRT: // %ft src = fe->id; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); - if (iret == 0) + LOG_VARTEXT_START(); + ret = lf_rawtext(tmplog, src, dst + maxsize - tmplog, ctx); + if (ret == NULL) goto out; - tmplog += iret; + tmplog = ret; /* sess->listener may be undefined if the session's owner is a health-check */ if (sess->listener && sess->listener->bind_conf->xprt->get_ssl_sock_ctx) LOGCHAR('~'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; #ifdef USE_OPENSSL case LOG_FMT_SSL_CIPHER: // %sslc @@ -2989,11 +4142,10 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (conn) { src = ssl_sock_get_cipher_name(conn); } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SSL_VERSION: // %sslv @@ -3002,20 +4154,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (conn) { src = ssl_sock_get_proto_version(conn); } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; #endif case LOG_FMT_BACKEND: // %b src = be->id; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVER: // %s @@ -3035,293 +4185,310 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t src = "<NOSRV>"; break; } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_Th: // %Th = handshake time - ret = ltoa_o(logs->t_handshake, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_handshake, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_Ti: // %Ti = HTTP idle time - ret = ltoa_o(logs->t_idle, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_idle, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TR: // %TR = HTTP request time - ret = ltoa_o((t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TQ: // %Tq = Th + Ti + TR - ret = ltoa_o(t_request, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, t_request, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TW: // %Tw - ret = ltoa_o((logs->t_queue >= 0) ? logs->t_queue - t_request : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_queue >= 0) ? logs->t_queue - t_request : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TC: // %Tc - ret = ltoa_o((logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_Tr: // %Tr - ret = ltoa_o((logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TD: // %Td + { + long value; + if (be->mode == PR_MODE_HTTP) - ret = ltoa_o((logs->t_data >= 0) ? logs->t_close - logs->t_data : -1, - tmplog, dst + maxsize - tmplog); + value = (logs->t_data >= 0) ? logs->t_close - logs->t_data : -1; else - ret = ltoa_o((logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1, - tmplog, dst + maxsize - tmplog); + value = (logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); + if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_Ta: // %Ta = active time = Tt - Th - Ti + { + long value = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); + if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0), - tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TT: // %Tt = total time if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close, tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_close, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TU: // %Tu = total time seen by user = Tt - Ti + { + long value = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0); + if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0), - tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_STATUS: // %ST - ret = ltoa_o(status, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, status, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BYTES: // %B if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = lltoa(logs->bytes_out, tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->bytes_out, ctx, LF_INT_LLTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BYTES_UP: // %U - ret = lltoa(logs->bytes_in, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->bytes_in, ctx, LF_INT_LLTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CCLIENT: // %CC src = txn ? txn->cli_cookie : NULL; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CSERVER: // %CS src = txn ? txn->srv_cookie : NULL; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TERMSTATE: // %ts - LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]); - LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]); - *tmplog = '\0'; - last_isspace = 0; + { + ctx->_buf[0] = sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]; + ctx->_buf[1] = sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]; + ret = lf_rawtext_len(tmplog, ctx->_buf, 2, maxsize - (tmplog - dst), ctx); + if (ret == NULL) + goto out; + tmplog = ret; break; + } case LOG_FMT_TERMSTATE_CK: // %tsc, same as TS with cookie state (for mode HTTP) - LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]); - LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]); - LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-'); - LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-'); - last_isspace = 0; + { + ctx->_buf[0] = sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]; + ctx->_buf[1] = sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]; + ctx->_buf[2] = (txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-'; + ctx->_buf[3] = (txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-'; + ret = lf_rawtext_len(tmplog, ctx->_buf, 4, maxsize - (tmplog - dst), ctx); + if (ret == NULL) + goto out; + tmplog = ret; break; + } case LOG_FMT_ACTCONN: // %ac - ret = ltoa_o(actconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, actconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FECONN: // %fc - ret = ltoa_o(fe->feconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, fe->feconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BECONN: // %bc - ret = ltoa_o(be->beconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, be->beconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SRVCONN: // %sc + { + unsigned long value; + switch (obj_type(s ? s->target : sess->origin)) { case OBJ_TYPE_SERVER: - ret = ultoa_o(__objt_server(s->target)->cur_sess, - tmplog, dst + maxsize - tmplog); + value = __objt_server(s->target)->cur_sess; break; case OBJ_TYPE_CHECK: - ret = ultoa_o(__objt_check(sess->origin)->server - ? __objt_check(sess->origin)->server->cur_sess - : 0, tmplog, dst + maxsize - tmplog); + value = (__objt_check(sess->origin)->server + ? __objt_check(sess->origin)->server->cur_sess + : 0); break; default: - ret = ultoa_o(0, tmplog, dst + maxsize - tmplog); + value = 0; break; } + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_ULTOA); + if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_RETRIES: // %rc + { + long int value = (s ? s->conn_retries : 0); + if (s_flags & SF_REDISP) - LOGCHAR('+'); - ret = ltoa_o((s ? s->conn_retries : 0), tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_SRVQUEUE: // %sq - ret = ltoa_o(logs->srv_queue_pos, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->srv_queue_pos, + ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BCKQUEUE: // %bq - ret = ltoa_o(logs->prx_queue_pos, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->prx_queue_pos, + ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_HDRREQUEST: // %hr /* request header */ if (fe->nb_req_cap && s && s->req_cap) { - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); LOGCHAR('{'); for (hdr = 0; hdr < fe->nb_req_cap; hdr++) { if (hdr) LOGCHAR('|'); if (s->req_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->req_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->req_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; } } LOGCHAR('}'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; } break; case LOG_FMT_HDRREQUESTLIST: // %hrl /* request header list */ if (fe->nb_req_cap && s && s->req_cap) { + LOG_STRARRAY_START(); for (hdr = 0; hdr < fe->nb_req_cap; hdr++) { if (hdr > 0) - LOGCHAR(' '); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_STRARRAY_NEXT(); + LOG_VARTEXT_START(); if (s->req_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->req_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->req_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; - } else if (!(tmp->options & LOG_OPT_QUOTE)) + } else if (!(ctx->options & LOG_OPT_QUOTE)) LOGCHAR('-'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; + /* Manually end variable text as we're emitting multiple + * texts at once + */ + LOG_VARTEXT_END(); } + LOG_STRARRAY_END(); } break; @@ -3329,70 +4496,63 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t case LOG_FMT_HDRRESPONS: // %hs /* response header */ if (fe->nb_rsp_cap && s && s->res_cap) { - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); LOGCHAR('{'); for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) { if (hdr) LOGCHAR('|'); if (s->res_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->res_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->res_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; } } LOGCHAR('}'); - last_isspace = 0; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); } break; case LOG_FMT_HDRRESPONSLIST: // %hsl /* response header list */ if (fe->nb_rsp_cap && s && s->res_cap) { + LOG_STRARRAY_START(); for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) { if (hdr > 0) - LOGCHAR(' '); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_STRARRAY_NEXT(); + LOG_VARTEXT_START(); if (s->res_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->res_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->res_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; - } else if (!(tmp->options & LOG_OPT_QUOTE)) + } else if (!(ctx->options & LOG_OPT_QUOTE)) LOGCHAR('-'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; + /* Manually end variable text as we're emitting multiple + * texts at once + */ + LOG_VARTEXT_END(); } + LOG_STRARRAY_END(); } break; case LOG_FMT_REQ: // %r /* Request */ - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); uri = txn && txn->uri ? txn->uri : "<BADREQ>"; ret = lf_encode_string(tmplog, dst + maxsize, - '#', url_encode_map, uri, tmp); - if (ret == NULL || *ret != '\0') + '#', url_encode_map, uri, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_PATH: // %HP uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3417,22 +4577,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_PATH_ONLY: // %HPO uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); @@ -3463,20 +4619,16 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = path.len; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_QUERY: // %HQ - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); if (!txn || !txn->uri) { chunk.area = "<BADREQ>"; @@ -3497,22 +4649,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = uri - qmark; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_URI: // %HU uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3537,21 +4685,17 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_METHOD: // %HM uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3567,21 +4711,17 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_VERSION: // %HV uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3612,87 +4752,121 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = end - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_COUNTER: // %rt - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", uniq_id); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", uniq_id); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(uniq_id, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, uniq_id, ctx, LF_INT_LTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_LOGCNT: // %lc - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", fe->log_count); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", fe->log_count); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ultoa_o(fe->log_count, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, fe->log_count, + ctx, LF_INT_ULTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_HOSTNAME: // %H src = hostname; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_PID: // %pid - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", pid); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(pid, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, pid, ctx, LF_INT_LTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_UNIQUEID: // %ID ret = NULL; if (s) - ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), tmp); + ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), ctx); else - ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), tmp); + ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; } + next_fmt: + if (value_beg == tmplog) { + /* handle the case where no data was generated for the value after + * the key was already announced + */ + if (ctx->options & LOG_OPT_ENCODE_JSON) { + /* for JSON, we simply output 'null' */ + iret = snprintf(tmplog, dst + maxsize - tmplog, "null"); + if (iret < 0 || iret >= dst + maxsize - tmplog) + goto out; + tmplog += iret; + } + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* for CBOR, we have the '22' primitive which is known as + * NULL + */ + LOG_CBOR_BYTE(0xF6); + } + + } + + /* if variable text was started for the current node data, we need + * to end it + */ + LOG_VARTEXT_END(); + if (tmplog != value_beg) { + /* data was actually generated for the current dynamic + * node, reset the space hint so that a new space may + * now be emitted when relevant. + */ + last_isspace = 0; + } + } + + /* back to global ctx (some encoding types may need to output + * ending closure) + */ + lf_buildctx_prepare(ctx, g_options, NULL); + + if (ctx->options & LOG_OPT_ENCODE_JSON) + LOGCHAR('}'); + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* end indefinite-length map */ + LOG_CBOR_BYTE(0xFF); } out: @@ -3738,11 +4912,11 @@ void strm_log(struct stream *s) } /* if unique-id was not generated */ - if (!isttest(s->unique_id) && !LIST_ISEMPTY(&sess->fe->format_unique_id)) { + if (!isttest(s->unique_id) && !lf_expr_isempty(&sess->fe->format_unique_id)) { stream_generate_unique_id(s, &sess->fe->format_unique_id); } - if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) { + if (!lf_expr_isempty(&sess->fe->logformat_sd)) { sd_size = build_logline(s, logline_rfc5424, global.max_syslog_len, &sess->fe->logformat_sd); } @@ -3780,13 +4954,13 @@ void sess_log(struct session *sess) if (sess->fe->options2 & PR_O2_LOGERRORS) level = LOG_ERR; - if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) { + if (!lf_expr_isempty(&sess->fe->logformat_sd)) { sd_size = sess_build_logline(sess, NULL, logline_rfc5424, global.max_syslog_len, &sess->fe->logformat_sd); } - if (!LIST_ISEMPTY(&sess->fe->logformat_error)) + if (!lf_expr_isempty(&sess->fe->logformat_error)) size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat_error); else size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat); @@ -4212,7 +5386,7 @@ static void syslog_io_handler(struct appctx *appctx) char *message; size_t size; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -4337,6 +5511,40 @@ static struct applet syslog_applet = { .release = NULL, }; +/* Atomically append an event to applet >ctx>'s output, prepending it with its + * size in decimal followed by a space. The line is read from vectors <v1> and + * <v2> at offset <ofs> relative to the area's origin, for <len> bytes. It + * returns the number of bytes consumed from the input vectors on success, -1 + * if it temporarily cannot (buffer full), -2 if it will never be able to (too + * large msg). The input vectors are not modified. The caller is responsible for + * making sure that there are at least ofs+len bytes in the input buffer. + */ +ssize_t syslog_applet_append_event(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len) +{ + struct appctx *appctx = ctx; + char *p; + + /* first, encode the message's size */ + chunk_reset(&trash); + p = ulltoa(len, trash.area, b_size(&trash)); + if (p) { + trash.data = p - trash.area; + trash.area[trash.data++] = ' '; + } + + /* check if the message has a chance to fit */ + if (unlikely(!p || trash.data + len > b_size(&trash))) + return -2; + + /* try to transfer it or report full */ + trash.data += vp_peek_ofs(v1, v2, ofs, trash.area + trash.data, len); + if (applet_putchk(appctx, &trash) == -1) + return -1; + + /* OK done */ + return len; +} + /* * Parse "log-forward" section and create corresponding sink buffer. * @@ -4397,7 +5605,7 @@ int cfg_parse_log_forward(const char *file, int linenum, char **args, int kwm) px->conf.file = strdup(file); px->conf.line = linenum; px->mode = PR_MODE_SYSLOG; - px->last_change = ns_to_sec(now_ns); + px->fe_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_FE; px->maxconn = 10; px->timeout.client = TICK_ETERNITY; @@ -4653,6 +5861,7 @@ static int postresolve_loggers() REGISTER_CONFIG_SECTION("log-forward", cfg_parse_log_forward, NULL); REGISTER_POST_CHECK(postresolve_loggers); REGISTER_POST_PROXY_CHECK(postcheck_log_backend); +REGISTER_POST_PROXY_CHECK(postcheck_logformat_proxy); REGISTER_PER_THREAD_ALLOC(init_log_buffers); REGISTER_PER_THREAD_FREE(deinit_log_buffers); @@ -258,7 +258,7 @@ static long get_value(struct lru64_head *lru, long a) /* do the painful work here */ a = sum(a); if (item) - lru64_commit(item, (void *)a, lru, 1, 0); + lru64_commit(item, (void *)a, lru, 0, 0); return a; } @@ -170,6 +170,34 @@ int sample_load_map(struct arg *arg, struct sample_conv *conv, return 1; } +/* try to match input sample against map entries, returns matched entry's key + * on success + */ +static int sample_conv_map_key(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct map_descriptor *desc; + struct pattern *pat; + + /* get config */ + desc = arg_p[0].data.map; + + /* Execute the match function. */ + pat = pattern_exec_match(&desc->pat, smp, 1); + + /* Match case. */ + if (pat) { + smp->data.type = SMP_T_STR; + smp->flags |= SMP_F_CONST; + smp->data.u.str.area = (char *)pat->ref->pattern; + smp->data.u.str.data = strlen(pat->ref->pattern); + return 1; + } + return 0; +} + +/* try to match input sample against map entries, returns matched entry's value + * on success + */ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *private) { struct map_descriptor *desc; @@ -345,22 +373,8 @@ struct show_map_ctx { static int cli_io_handler_pat_list(struct appctx *appctx) { struct show_map_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct pat_ref_elt *elt; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last ref_elt being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) { - HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock); - LIST_DEL_INIT(&ctx->bref.users); - HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock); - } - return 1; - } - switch (ctx->state) { case STATE_INIT: ctx->state = STATE_LIST; @@ -1226,6 +1240,16 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "map_int_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_ADDR, (void *)PAT_MATCH_INT }, { "map_ip_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_ADDR, (void *)PAT_MATCH_IP }, + { "map_str_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR }, + { "map_beg_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_BEG }, + { "map_sub_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_SUB }, + { "map_dir_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DIR }, + { "map_dom_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM }, + { "map_end_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END }, + { "map_reg_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG }, + { "map_int_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT }, + { "map_ip_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP }, + { /* END */ }, }}; diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c index 448d8bb..102a4f0 100644 --- a/src/mux_fcgi.c +++ b/src/mux_fcgi.c @@ -488,14 +488,14 @@ static int fcgi_buf_available(void *target) struct fcgi_conn *fconn = target; struct fcgi_strm *fstrm; - if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf)) { + if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf, DB_MUX_RX)) { TRACE_STATE("unblocking fconn, dbuf allocated", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn); fconn->flags &= ~FCGI_CF_DEM_DALLOC; fcgi_conn_restart_reading(fconn, 1); return 1; } - if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf))) { + if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf), DB_MUX_TX)) { TRACE_STATE("unblocking fconn, mbuf allocated", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn); fconn->flags &= ~FCGI_CF_MUX_MALLOC; if (fconn->flags & FCGI_CF_DEM_MROOM) { @@ -507,7 +507,7 @@ static int fcgi_buf_available(void *target) if ((fconn->flags & FCGI_CF_DEM_SALLOC) && (fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi)) && fcgi_strm_sc(fstrm) && - b_alloc(&fstrm->rxbuf)) { + b_alloc(&fstrm->rxbuf, DB_SE_RX)) { TRACE_STATE("unblocking fstrm, rxbuf allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK|FCGI_EV_STRM_WAKE, fconn->conn, fstrm); fconn->flags &= ~FCGI_CF_DEM_SALLOC; fcgi_conn_restart_reading(fconn, 1); @@ -523,10 +523,8 @@ static inline struct buffer *fcgi_get_buf(struct fcgi_conn *fconn, struct buffer struct buffer *buf = NULL; if (likely(!LIST_INLIST(&fconn->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - fconn->buf_wait.target = fconn; - fconn->buf_wait.wakeup_cb = fcgi_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &fconn->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_MUX_RX)) == NULL)) { + b_queue(DB_MUX_RX, &fconn->buf_wait, fconn, fcgi_buf_available); } return buf; } @@ -755,8 +753,7 @@ static void fcgi_release(struct fcgi_conn *fconn) TRACE_POINT(FCGI_EV_FCONN_END); - if (LIST_INLIST(&fconn->buf_wait.list)) - LIST_DEL_INIT(&fconn->buf_wait.list); + b_dequeue(&fconn->buf_wait); fcgi_release_buf(fconn, &fconn->dbuf); fcgi_release_mbuf(fconn); @@ -3089,7 +3086,9 @@ static int fcgi_wake(struct connection *conn) static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) { + struct fcgi_conn *fconn = conn->ctx; int ret = 0; + switch (mux_ctl) { case MUX_CTL_STATUS: if (!(conn->flags & CO_FL_WAIT_XPRT)) @@ -3097,6 +3096,10 @@ static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *ou return ret; case MUX_CTL_EXIT_STATUS: return MUX_ES_UNKNOWN; + case MUX_CTL_GET_NBSTRM: + return fconn->nb_streams; + case MUX_CTL_GET_MAXSTRM: + return fconn->streams_limit; default: return -1; } @@ -3581,6 +3584,10 @@ static void fcgi_detach(struct sedesc *sd) } } if (eb_is_empty(&fconn->streams_by_id)) { + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&fconn->wait_event.tasklet->state, TASK_F_USR1); if (session_check_idle_conn(fconn->conn->owner, fconn->conn) != 0) { /* The connection is destroyed, let's leave */ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR); @@ -3619,7 +3626,7 @@ static void fcgi_detach(struct sedesc *sd) } else if (!fconn->conn->hash_node->node.node.leaf_p && fcgi_avail_streams(fconn->conn) > 0 && objt_server(fconn->conn->target) && - !LIST_INLIST(&fconn->conn->session_list)) { + !LIST_INLIST(&fconn->conn->sess_el)) { srv_add_to_avail_list(__objt_server(fconn->conn->target), fconn->conn); } } @@ -3787,24 +3794,16 @@ struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state) return NULL; } -/* shutr() called by the stream connector (mux_ops.shutr) */ -static void fcgi_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct fcgi_strm *fstrm = __sc_mux_strm(sc); - - TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); - if (!mode) - return; - fcgi_do_shutr(fstrm); -} - -/* shutw() called by the stream connector (mux_ops.shutw) */ -static void fcgi_shutw(struct stconn *sc, enum co_shw_mode mode) +static void fcgi_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct fcgi_strm *fstrm = __sc_mux_strm(sc); - TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); - fcgi_do_shutw(fstrm); + TRACE_ENTER(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) + fcgi_do_shutw(fstrm); + if (mode & SE_SHR_RESET) + fcgi_do_shutr(fstrm); + TRACE_LEAVE(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); } /* Called from the upper layer, to subscribe <es> to events <event_type>. The @@ -4163,25 +4162,35 @@ static int fcgi_show_fd(struct buffer *msg, struct connection *conn) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int fcgi_takeover(struct connection *conn, int orig_tid) +static int fcgi_takeover(struct connection *conn, int orig_tid, int release) { struct fcgi_conn *fcgi = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&fcgi->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -4208,8 +4217,10 @@ static int fcgi_takeover(struct connection *conn, int orig_tid) fcgi->task = new_task; new_task = NULL; - fcgi->task->process = fcgi_timeout_task; - fcgi->task->context = fcgi; + if (!release) { + fcgi->task->process = fcgi_timeout_task; + fcgi->task->context = fcgi; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -4219,10 +4230,26 @@ static int fcgi_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid); fcgi->wait_event.tasklet = new_tasklet; - fcgi->wait_event.tasklet->process = fcgi_io_cb; - fcgi->wait_event.tasklet->context = fcgi; - fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx, - SUB_RETRY_RECV, &fcgi->wait_event); + if (!release) { + fcgi->wait_event.tasklet->process = fcgi_io_cb; + fcgi->wait_event.tasklet->context = fcgi; + fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx, + SUB_RETRY_RECV, &fcgi->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&fcgi->buf_wait.list)) + _b_dequeue(&fcgi->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -4252,8 +4279,7 @@ static const struct mux_ops mux_fcgi_ops = { .snd_buf = fcgi_snd_buf, .subscribe = fcgi_subscribe, .unsubscribe = fcgi_unsubscribe, - .shutr = fcgi_shutr, - .shutw = fcgi_shutw, + .shut = fcgi_shut, .ctl = fcgi_ctl, .sctl = fcgi_sctl, .show_fd = fcgi_show_fd, diff --git a/src/mux_h1.c b/src/mux_h1.c index 6593661..6bdaf71 100644 --- a/src/mux_h1.c +++ b/src/mux_h1.c @@ -227,7 +227,7 @@ enum { }; -static struct name_desc h1_stats[] = { +static struct stat_col h1_stats[] = { [H1_ST_OPEN_CONN] = { .name = "h1_open_connections", .desc = "Count of currently open connections" }, [H1_ST_OPEN_STREAM] = { .name = "h1_open_streams", @@ -264,21 +264,54 @@ static struct h1_counters { #endif } h1_counters; -static void h1_fill_stats(void *data, struct field *stats) +static int h1_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h1_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[H1_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns); - stats[H1_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams); - stats[H1_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns); - stats[H1_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams); + for (; current_field < H1_STATS_COUNT; current_field++) { + struct field metric = { 0 }; - stats[H1_ST_BYTES_IN] = mkf_u64(FN_COUNTER, counters->bytes_in); - stats[H1_ST_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->bytes_out); + switch (current_field) { + case H1_ST_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->open_conns); + break; + case H1_ST_OPEN_STREAM: + metric = mkf_u64(FN_GAUGE, counters->open_streams); + break; + case H1_ST_TOTAL_CONN: + metric = mkf_u64(FN_COUNTER, counters->total_conns); + break; + case H1_ST_TOTAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->total_streams); + break; + case H1_ST_BYTES_IN: + metric = mkf_u64(FN_COUNTER, counters->bytes_in); + break; + case H1_ST_BYTES_OUT: + metric = mkf_u64(FN_COUNTER, counters->bytes_out); + break; #if defined(USE_LINUX_SPLICE) - stats[H1_ST_SPLICED_BYTES_IN] = mkf_u64(FN_COUNTER, counters->spliced_bytes_in); - stats[H1_ST_SPLICED_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->spliced_bytes_out); + case H1_ST_SPLICED_BYTES_IN: + metric = mkf_u64(FN_COUNTER, counters->spliced_bytes_in); + break; + case H1_ST_SPLICED_BYTES_OUT: + metric = mkf_u64(FN_COUNTER, counters->spliced_bytes_out); + break; #endif + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module h1_stats_module = { @@ -302,6 +335,8 @@ DECLARE_STATIC_POOL(pool_head_h1s, "h1s", sizeof(struct h1s)); static int h1_recv(struct h1c *h1c); static int h1_send(struct h1c *h1c); static int h1_process(struct h1c *h1c); +static void h1_release(struct h1c *h1c); + /* h1_io_cb is exported to see it resolved in "show fd" */ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state); struct task *h1_timeout_task(struct task *t, void *context, unsigned int state); @@ -466,45 +501,91 @@ static int h1_buf_available(void *target) { struct h1c *h1c = target; - if ((h1c->flags & H1C_F_IN_ALLOC) && b_alloc(&h1c->ibuf)) { - TRACE_STATE("unblocking h1c, ibuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + if (h1c->flags & H1C_F_IN_ALLOC) { h1c->flags &= ~H1C_F_IN_ALLOC; - if (h1_recv_allowed(h1c)) - tasklet_wakeup(h1c->wait_event.tasklet); - return 1; + h1c->flags |= H1C_F_IN_MAYALLOC; } - if ((h1c->flags & H1C_F_OUT_ALLOC) && b_alloc(&h1c->obuf)) { - TRACE_STATE("unblocking h1s, obuf allocated", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s); + if ((h1c->flags & H1C_F_OUT_ALLOC) && h1c->h1s) { + TRACE_STATE("unblocking h1s, obuf allocatable", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s); h1c->flags &= ~H1C_F_OUT_ALLOC; - if (h1c->h1s) - h1_wake_stream_for_send(h1c->h1s); - return 1; + h1c->flags |= H1C_F_OUT_MAYALLOC; + h1_wake_stream_for_send(h1c->h1s); } - if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s && b_alloc(&h1c->h1s->rxbuf)) { - TRACE_STATE("unblocking h1c, stream rxbuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s) { + TRACE_STATE("unblocking h1c, stream rxbuf allocatable", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); h1c->flags &= ~H1C_F_IN_SALLOC; + h1c->flags |= H1C_F_IN_SMAYALLOC; tasklet_wakeup(h1c->wait_event.tasklet); - return 1; } - return 0; + if ((h1c->flags & H1C_F_IN_MAYALLOC) && h1_recv_allowed(h1c)) { + TRACE_STATE("unblocking h1c, ibuf allocatable", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + tasklet_wakeup(h1c->wait_event.tasklet); + } + + return 1; +} + +/* + * Allocate the h1c's ibuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_IN_ALLOC flag on the connection. It will advertise a more + * urgent allocation when a stream is already present than when none is present + * since in one case a buffer might be needed to permit to release another one, + * while in the other case we've simply not started anything. + */ +static inline struct buffer *h1_get_ibuf(struct h1c *h1c) +{ + struct buffer *buf; + + if (unlikely((buf = b_alloc(&h1c->ibuf, DB_MUX_RX | + ((h1c->flags & H1C_F_IN_MAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_MUX_RX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_IN_ALLOC; + } + else + h1c->flags &= ~H1C_F_IN_MAYALLOC; + + return buf; } /* - * Allocate a buffer. If if fails, it adds the mux in buffer wait queue. + * Allocate the h1c's obuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_OUT_ALLOC flag on the connection. */ -static inline struct buffer *h1_get_buf(struct h1c *h1c, struct buffer *bptr) +static inline struct buffer *h1_get_obuf(struct h1c *h1c) { - struct buffer *buf = NULL; + struct buffer *buf; - if (likely(!LIST_INLIST(&h1c->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - h1c->buf_wait.target = h1c; - h1c->buf_wait.wakeup_cb = h1_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &h1c->buf_wait.list); + if (unlikely((buf = b_alloc(&h1c->obuf, DB_MUX_TX | + ((h1c->flags & H1C_F_OUT_MAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_MUX_TX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_OUT_ALLOC; } + else + h1c->flags &= ~H1C_F_OUT_MAYALLOC; + + return buf; +} + +/* + * Allocate the h1s's rxbuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_IN_SALLOC flag on the connection. + */ +static inline struct buffer *h1_get_rxbuf(struct h1s *h1s) +{ + struct h1c *h1c = h1s->h1c; + struct buffer *buf; + + if (unlikely((buf = b_alloc(&h1s->rxbuf, DB_SE_RX | + ((h1c->flags & H1C_F_IN_SMAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_SE_RX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_IN_SALLOC; + } + else + h1c->flags &= ~H1C_F_IN_SMAYALLOC; + return buf; } @@ -521,11 +602,11 @@ static inline void h1_release_buf(struct h1c *h1c, struct buffer *bptr) } /* Returns 1 if the H1 connection is alive (IDLE, EMBRYONIC, RUNNING or - * RUNNING). Ortherwise 0 is returned. + * DRAINING). Ortherwise 0 is returned. */ static inline int h1_is_alive(const struct h1c *h1c) { - return (h1c->state <= H1_CS_RUNNING); + return (h1c->state <= H1_CS_DRAINING); } /* Switch the H1 connection to CLOSING or CLOSED mode, depending on the output @@ -869,7 +950,8 @@ static void h1s_destroy(struct h1s *h1s) h1_release_buf(h1c, &h1s->rxbuf); h1c->flags &= ~(H1C_F_WANT_FASTFWD| - H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_IN_SALLOC| + H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_OUT_MAYALLOC| + H1C_F_IN_SALLOC|H1C_F_IN_SMAYALLOC| H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER); if (!(h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR|H1C_F_ABRT_PENDING|H1C_F_ABRTED)) && /* No error/read0/abort */ @@ -893,6 +975,162 @@ static void h1s_destroy(struct h1s *h1s) } } + +/* Check if shutdown performed of an an H1S must lead to a connection shutdown + * of if it can be kept alive. It returns 1 if the connection must be shut down + * and 0 it if can be kept alive. + */ +static int h1s_must_shut_conn(struct h1s *h1s) +{ + struct h1c *h1c = h1s->h1c; + int ret; + + TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s); + + if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) { + TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 1; + } + else if (h1c->state == H1_CS_CLOSING || (h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR))) { + TRACE_STATE("shutdown on connection (EOS || CLOSING || ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 1; + } + else if (h1c->state == H1_CS_UPGRADING) { + TRACE_STATE("keep connection alive (UPGRADING)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else if (!(h1c->flags & H1C_F_IS_BACK) && h1s->req.state != H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) { + TRACE_STATE("defer shutdown to drain request first", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) { + TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else { + /* The default case, do the shutdown */ + ret = 1; + } + + TRACE_LEAVE(H1_EV_STRM_SHUT, h1c->conn, h1s); + return ret; +} + +/* Really detach the H1S. Most of time of it called from h1_detach() when the + * stream is detached from the connection. But if the request message must be + * drained first, the detach is deferred. + */ +static void h1s_finish_detach(struct h1s *h1s) +{ + struct h1c *h1c; + struct session *sess; + int is_not_first; + + TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s); + + sess = h1s->sess; + h1c = h1s->h1c; + + sess->accept_date = date; + sess->accept_ts = now_ns; + sess->t_handshake = 0; + sess->t_idle = -1; + + is_not_first = h1s->flags & H1S_F_NOT_FIRST; + h1s_destroy(h1s); + + if (h1c->state == H1_CS_IDLE && (h1c->flags & H1C_F_IS_BACK)) { + /* this connection may be killed at any moment, we want it to + * die "cleanly" (i.e. only an RST). + */ + h1c->flags |= H1C_F_SILENT_SHUT; + + /* If there are any excess server data in the input buffer, + * release it and close the connection ASAP (some data may + * remain in the output buffer). This happens if a server sends + * invalid responses. So in such case, we don't want to reuse + * the connection + */ + if (b_data(&h1c->ibuf)) { + h1_release_buf(h1c, &h1c->ibuf); + h1_close(h1c); + TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END); + goto release; + } + + if (h1c->conn->flags & CO_FL_PRIVATE) { + /* Add the connection in the session server list, if not already done */ + if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) { + h1c->conn->owner = NULL; + h1c->conn->mux->destroy(h1c); + goto end; + } + /* Always idle at this step */ + + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + if (session_check_idle_conn(sess, h1c->conn)) { + /* The connection got destroyed, let's leave */ + TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); + goto end; + } + } + else { + if (h1c->conn->owner == sess) + h1c->conn->owner = NULL; + + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); + xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx); + + if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) { + /* The server doesn't want it, let's kill the connection right away */ + h1c->conn->mux->destroy(h1c); + TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); + goto end; + } + /* At this point, the connection has been added to the + * server idle list, so another thread may already have + * hijacked it, so we can't do anything with it. + */ + return; + } + } + + release: + /* We don't want to close right now unless the connection is in error or shut down for writes */ + if ((h1c->flags & H1C_F_ERROR) || + (h1c->state == H1_CS_CLOSED) || + (h1c->state == H1_CS_CLOSING && !b_data(&h1c->obuf)) || + !h1c->conn->owner) { + TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn); + h1_release(h1c); + } + else { + if (h1c->state == H1_CS_IDLE) { + /* If we have a new request, process it immediately or + * subscribe for reads waiting for new data + */ + if (unlikely(b_data(&h1c->ibuf))) { + if (h1_process(h1c) == -1) + goto end; + } + else + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); + } + h1_set_idle_expiration(h1c); + h1_refresh_timeout(h1c); + } + end: + TRACE_LEAVE(H1_EV_STRM_END); +} + + /* * Initialize the mux once it's attached. It is expected that conn->ctx points * to the existing stream connector (for outgoing connections or for incoming @@ -1049,9 +1287,7 @@ static void h1_release(struct h1c *h1c) } - if (LIST_INLIST(&h1c->buf_wait.list)) - LIST_DEL_INIT(&h1c->buf_wait.list); - + b_dequeue(&h1c->buf_wait); h1_release_buf(h1c, &h1c->ibuf); h1_release_buf(h1c, &h1c->obuf); @@ -1416,21 +1652,33 @@ static void h1_capture_bad_message(struct h1c *h1c, struct h1s *h1s, &ctx, h1_show_error_snapshot); } -/* Emit the chunksize followed by a CRLF in front of data of the buffer +/* Emit the chunk size <chksz> followed by a CRLF in front of data of the buffer * <buf>. It goes backwards and starts with the byte before the buffer's * head. The caller is responsible for ensuring there is enough room left before - * the buffer's head for the string. + * the buffer's head for the string. if <length> is greater than 0, it + * represents the expected total length of the chunk size, including the + * CRLF. So it will be padded with 0 to resepct this length. It is the caller + * responsibility to pass the right value. if <length> is set to 0 (or less that + * the smallest size to represent the chunk size), it is ignored. */ -static void h1_prepend_chunk_size(struct buffer *buf, size_t chksz) +static void h1_prepend_chunk_size(struct buffer *buf, size_t chksz, size_t length) { char *beg, *end; beg = end = b_head(buf); *--beg = '\n'; *--beg = '\r'; + if (length) + length -= 2; do { *--beg = hextab[chksz & 0xF]; + if (length) + --length; } while (chksz >>= 4); + while (length) { + *--beg = '0'; + --length; + } buf->head -= (end - beg); b_add(buf, end - beg); } @@ -2328,15 +2576,47 @@ static size_t h1_make_eoh(struct h1s *h1s, struct h1m *h1m, struct htx *htx, siz b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf)); outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0); + /* Deal with removed "Content-Length" or "Transfer-Encoding" headers during analysis */ + if (((h1m->flags & H1_MF_CLEN) && !(h1s->flags & H1S_F_HAVE_CLEN))|| + ((h1m->flags & H1_MF_CHNK) && !(h1s->flags & H1S_F_HAVE_CHNK))) { + TRACE_STATE("\"Content-Length\" or \"Transfer-Encoding\" header removed during analysis", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + + if (h1s->flags & (H1S_F_HAVE_CLEN|H1S_F_HAVE_CHNK)) { + /* At least on header is present, we can continue */ + if (!(h1s->flags & H1S_F_HAVE_CLEN)) { + h1m->curr_len = h1m->body_len = 0; + h1m->flags &= ~H1_MF_CLEN; + } + else /* h1s->flags & H1S_F_HAVE_CHNK */ + h1m->flags &= ~(H1_MF_XFER_ENC|H1_MF_CHNK); + } + else { + /* Both headers are missing */ + if (h1m->flags & H1_MF_RESP) { + /* It is a esponse: Switch to unknown xfer length */ + h1m->flags &= ~(H1_MF_XFER_LEN|H1_MF_XFER_ENC|H1_MF_CLEN|H1_MF_CHNK); + h1s->flags &= ~(H1S_F_HAVE_CLEN|H1S_F_HAVE_CHNK); + TRACE_STATE("Switch response to unknown XFER length", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + } + else { + /* It is the request: Add "Content-Length: 0" header and skip payload */ + struct ist n = ist("content-length"); + if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV)) + h1_adjust_case_outgoing_hdr(h1s, h1m, &n); + if (!h1_format_htx_hdr(n, ist("0"), &outbuf)) + goto full; + + h1m->flags = (h1m->flags & ~(H1_MF_XFER_ENC|H1_MF_CHNK)) | H1_MF_CLEN; + h1s->flags = (h1s->flags & ~H1S_F_HAVE_CHNK) | (H1S_F_HAVE_CLEN|H1S_F_BODYLESS_REQ); + h1m->curr_len = h1m->body_len = 0; + TRACE_STATE("Set request content-length to 0 and skip payload", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + } + } + } + /* Deal with "Connection" header */ if (!(h1s->flags & H1S_F_HAVE_O_CONN)) { - if ((htx->flags & HTX_FL_PROXY_RESP) && h1s->req.state != H1_MSG_DONE) { - /* If the reply comes from haproxy while the request is - * not finished, we force the connection close. */ - h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO; - TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s); - } - else if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) { + if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) { /* T-E + C-L: force close */ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO; h1m->flags &= ~H1_MF_CLEN; @@ -2384,23 +2664,6 @@ static size_t h1_make_eoh(struct h1s *h1s, struct h1m *h1m, struct htx *htx, siz h1s->flags |= H1S_F_HAVE_CHNK; } - /* Deal with "Content-Length header */ - if ((h1m->flags & H1_MF_CLEN) && !(h1s->flags & H1S_F_HAVE_CLEN)) { - char *end; - - h1m->curr_len = h1m->body_len = htx->data + htx->extra - sz; - end = DISGUISE(ulltoa(h1m->body_len, trash.area, b_size(&trash))); - - n = ist("content-length"); - v = ist2(trash.area, end-trash.area); - if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV)) - h1_adjust_case_outgoing_hdr(h1s, h1m, &n); - if (!h1_format_htx_hdr(n, v, &outbuf)) - goto full; - TRACE_STATE("add \"Content-Length: <LEN>\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); - h1s->flags |= H1S_F_HAVE_CLEN; - } - /* Add the server name to a header (if requested) */ if (!(h1s->flags & H1S_F_HAVE_SRV_NAME) && !(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name)) { @@ -2555,7 +2818,8 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, * end-to-end. This is the situation that happens all the time with * large files. */ - if ((!(h1m->flags & H1_MF_RESP) || !(h1s->flags & H1S_F_BODYLESS_RESP)) && + if (((!(h1m->flags & H1_MF_RESP) && !(h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && !(h1s->flags & H1S_F_BODYLESS_RESP))) && !b_data(&h1c->obuf) && (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_CHNK) && (!h1m->curr_len || count == h1m->curr_len))) && htx_nbblks(htx) == 1 && @@ -2612,7 +2876,7 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, /* Because chunk meta-data are prepended, the chunk size of the current chunk * must be handled before the end of the previous chunk. */ - h1_prepend_chunk_size(&h1c->obuf, h1m->curr_len); + h1_prepend_chunk_size(&h1c->obuf, h1m->curr_len, 0); if (h1m->state == H1_MSG_CHUNK_CRLF) h1_prepend_chunk_crlf(&h1c->obuf); @@ -2682,8 +2946,9 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, last_data = 1; } - if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) { - TRACE_PROTO("Skip data for bodyless response", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx); + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + TRACE_PROTO("Skip data for bodyless message", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx); goto nextblk; } @@ -2754,7 +3019,8 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, } else if (type == HTX_BLK_EOT || type == HTX_BLK_TLR) { - if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) { + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { /* Do nothing the payload must be skipped * because it is a bodyless response */ @@ -2954,7 +3220,9 @@ static size_t h1_make_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx if (sz > count) goto error; - if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) + if (!(h1m->flags & H1_MF_CHNK) || + (!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) goto nextblk; n = htx_get_blk_name(htx, blk); @@ -2967,7 +3235,9 @@ static size_t h1_make_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx goto full; } else if (type == HTX_BLK_EOT) { - if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + if (!(h1m->flags & H1_MF_CHNK) || + (!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request trailers skipped" : "H1 response trailers skipped"), H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s); } @@ -3023,8 +3293,7 @@ static size_t h1_make_chunk(struct h1s *h1s, struct h1m * h1m, size_t len) TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s); - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + if (!h1_get_obuf(h1c)) { TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s); goto end; } @@ -3077,8 +3346,7 @@ static size_t h1_process_mux(struct h1c *h1c, struct buffer *buf, size_t count) if (h1s->flags & (H1S_F_INTERNAL_ERROR|H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK)) goto end; - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + if (!h1_get_obuf(h1c)) { TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s); goto end; } @@ -3252,8 +3520,8 @@ static int h1_send_error(struct h1c *h1c) goto out; } - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= (H1C_F_OUT_ALLOC|H1C_F_ABRT_PENDING); + if (!h1_get_obuf(h1c)) { + h1c->flags |= H1C_F_ABRT_PENDING; TRACE_STATE("waiting for h1c obuf allocation", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn); goto out; } @@ -3291,6 +3559,11 @@ static int h1_handle_internal_err(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } session_inc_http_req_ctr(sess); proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1); _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[5]); @@ -3301,6 +3574,7 @@ static int h1_handle_internal_err(struct h1c *h1c) h1c->errcode = 500; ret = h1_send_error(h1c); sess_log(sess); + end: return ret; } @@ -3314,6 +3588,11 @@ static int h1_handle_parsing_error(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3347,6 +3626,11 @@ static int h1_handle_not_impl_err(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3377,6 +3661,11 @@ static int h1_handle_req_tout(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3421,8 +3710,7 @@ static int h1_recv(struct h1c *h1c) return 1; } - if (!h1_get_buf(h1c, &h1c->ibuf)) { - h1c->flags |= H1C_F_IN_ALLOC; + if (!h1_get_ibuf(h1c)) { TRACE_STATE("waiting for h1c ibuf allocation", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn); return 0; } @@ -3594,7 +3882,7 @@ static int h1_process(struct h1c * h1c) /* Try to parse now the first block of a request, creating the H1 stream if necessary */ if (b_data(&h1c->ibuf) && /* Input data to be processed */ - (h1c->state < H1_CS_RUNNING) && /* IDLE, EMBRYONIC or UPGRADING */ + ((h1c->state < H1_CS_RUNNING) || (h1c->state == H1_CS_DRAINING)) && /* IDLE, EMBRYONIC, UPGRADING or DRAINING */ !(h1c->flags & (H1C_F_IN_SALLOC|H1C_F_ABRT_PENDING))) { /* No allocation failure on the stream rxbuf and no ERROR on the H1C */ struct h1s *h1s = h1c->h1s; struct buffer *buf; @@ -3605,7 +3893,8 @@ static int h1_process(struct h1c * h1c) goto release; /* First of all handle H1 to H2 upgrade (no need to create the H1 stream) */ - if (!(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */ + if (h1c->state != H1_CS_DRAINING && /* Not draining message */ + !(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */ !(h1c->px->options2 & PR_O2_NO_H2_UPGRADE) && /* H2 upgrade supported by the proxy */ !(conn->mux->flags & MX_FL_NO_UPG)) { /* the current mux supports upgrades */ /* Try to match H2 preface before parsing the request headers. */ @@ -3635,9 +3924,8 @@ static int h1_process(struct h1c * h1c) h1s->sess->t_idle = ns_to_ms(now_ns - h1s->sess->accept_ts) - h1s->sess->t_handshake; /* Get the stream rxbuf */ - buf = h1_get_buf(h1c, &h1s->rxbuf); + buf = h1_get_rxbuf(h1s); if (!buf) { - h1c->flags |= H1C_F_IN_SALLOC; TRACE_STATE("waiting for stream rxbuf allocation", H1_EV_H1C_WAKE|H1_EV_H1C_BLK, h1c->conn); return 0; } @@ -3646,7 +3934,7 @@ static int h1_process(struct h1c * h1c) h1_process_demux(h1c, buf, count); h1_release_buf(h1c, &h1s->rxbuf); h1_set_idle_expiration(h1c); - if (h1c->state < H1_CS_RUNNING) { + if (h1c->state != H1_CS_RUNNING) { // TODO: be sure state cannot change in h1_process_demux if (h1s->flags & H1S_F_INTERNAL_ERROR) { h1_handle_internal_err(h1c); TRACE_ERROR("internal error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR); @@ -3689,6 +3977,11 @@ static int h1_process(struct h1c * h1c) if (h1_send_error(h1c)) h1_send(h1c); } + else if (h1c->state == H1_CS_DRAINING) { + BUG_ON(h1c->h1s->sd && !se_fl_test(h1c->h1s->sd, SE_FL_ORPHAN)); + h1s_destroy(h1c->h1s); + TRACE_STATE("abort/error when draining message. destroy h1s and close h1c", H1_EV_H1S_END, h1c->conn); + } else { h1_close(h1c); TRACE_STATE("close h1c", H1_EV_H1S_END, h1c->conn); @@ -3717,6 +4010,17 @@ static int h1_process(struct h1c * h1c) h1_alert(h1s); } } + else if (h1c->state == H1_CS_DRAINING) { + BUG_ON(!h1c->h1s); + if (se_fl_test(h1c->h1s->sd, SE_FL_EOI)) { + if (h1s_must_shut_conn(h1c->h1s)) { + h1_shutw_conn(conn); + goto release; + } + h1s_finish_detach(h1c->h1s); + goto end; + } + } if (!b_data(&h1c->ibuf)) h1_release_buf(h1c, &h1c->ibuf); @@ -4025,8 +4329,6 @@ static void h1_detach(struct sedesc *sd) { struct h1s *h1s = sd->se; struct h1c *h1c; - struct session *sess; - int is_not_first; TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s); @@ -4034,149 +4336,47 @@ static void h1_detach(struct sedesc *sd) TRACE_LEAVE(H1_EV_STRM_END); return; } - - sess = h1s->sess; h1c = h1s->h1c; - sess->accept_date = date; - sess->accept_ts = now_ns; - sess->t_handshake = 0; - sess->t_idle = -1; - - is_not_first = h1s->flags & H1S_F_NOT_FIRST; - h1s_destroy(h1s); - - if (h1c->state == H1_CS_IDLE && (h1c->flags & H1C_F_IS_BACK)) { - /* this connection may be killed at any moment, we want it to - * die "cleanly" (i.e. only an RST). + if (h1c->state == H1_CS_RUNNING && !(h1c->flags & H1C_F_IS_BACK) && h1s->req.state != H1_MSG_DONE) { + h1c->state = H1_CS_DRAINING; + TRACE_DEVEL("Deferring H1S destroy to drain message", H1_EV_STRM_END, h1s->h1c->conn, h1s); + /* If we have a pending data, process it immediately or + * subscribe for reads waiting for new data */ - h1c->flags |= H1C_F_SILENT_SHUT; - - /* If there are any excess server data in the input buffer, - * release it and close the connection ASAP (some data may - * remain in the output buffer). This happens if a server sends - * invalid responses. So in such case, we don't want to reuse - * the connection - */ - if (b_data(&h1c->ibuf)) { - h1_release_buf(h1c, &h1c->ibuf); - h1_close(h1c); - TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END); - goto release; - } - - if (h1c->conn->flags & CO_FL_PRIVATE) { - /* Add the connection in the session server list, if not already done */ - if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) { - h1c->conn->owner = NULL; - h1c->conn->mux->destroy(h1c); + if (unlikely(b_data(&h1c->ibuf))) { + if (h1_process(h1c) == -1) goto end; - } - /* Always idle at this step */ - if (session_check_idle_conn(sess, h1c->conn)) { - /* The connection got destroyed, let's leave */ - TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; - } } - else { - if (h1c->conn->owner == sess) - h1c->conn->owner = NULL; - - /* mark that the tasklet may lose its context to another thread and - * that the handler needs to check it under the idle conns lock. - */ - HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + else h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); - xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx); - - if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) { - /* The server doesn't want it, let's kill the connection right away */ - h1c->conn->mux->destroy(h1c); - TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; - } - /* At this point, the connection has been added to the - * server idle list, so another thread may already have - * hijacked it, so we can't do anything with it. - */ - return; - } - } - - release: - /* We don't want to close right now unless the connection is in error or shut down for writes */ - if ((h1c->flags & H1C_F_ERROR) || - (h1c->state == H1_CS_CLOSED) || - (h1c->state == H1_CS_CLOSING && !b_data(&h1c->obuf)) || - !h1c->conn->owner) { - TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn); - h1_release(h1c); - } - else { - if (h1c->state == H1_CS_IDLE) { - /* If we have a new request, process it immediately or - * subscribe for reads waiting for new data - */ - if (unlikely(b_data(&h1c->ibuf))) { - if (h1_process(h1c) == -1) - goto end; - } - else - h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); - } h1_set_idle_expiration(h1c); h1_refresh_timeout(h1c); } + else + h1s_finish_detach(h1s); + end: TRACE_LEAVE(H1_EV_STRM_END); } - -static void h1_shutr(struct stconn *sc, enum co_shr_mode mode) +static void h1_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct h1s *h1s = __sc_mux_strm(sc); struct h1c *h1c; - if (!h1s) - return; - h1c = h1s->h1c; - - TRACE_POINT(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode}); -} - -static void h1_shutw(struct stconn *sc, enum co_shw_mode mode) -{ - struct h1s *h1s = __sc_mux_strm(sc); - struct h1c *h1c; - - if (!h1s) + if (!h1s || !(mode & (SE_SHW_SILENT|SE_SHW_NORMAL))) return; h1c = h1s->h1c; TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode}); - if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) { - TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto do_shutw; - } - if (h1c->state == H1_CS_CLOSING || (h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR))) { - TRACE_STATE("shutdown on connection (EOS || CLOSING || ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto do_shutw; - } - - if (h1c->state == H1_CS_UPGRADING) { - TRACE_STATE("keep connection alive (UPGRADING)", H1_EV_STRM_SHUT, h1c->conn, h1s); + if (!h1s_must_shut_conn(h1s)) goto end; - } - if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) { - TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto end; - } do_shutw: h1_close(h1c); - if (mode != CO_SHW_NORMAL) + if (mode & SE_SHW_NORMAL) h1c->flags |= H1C_F_SILENT_SHUT; if (!b_data(&h1c->obuf)) @@ -4405,12 +4605,12 @@ static inline struct sedesc *h1s_opposite_sd(struct h1s *h1s) return sdo; } -static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct h1s *h1s = __sc_mux_strm(sc); struct h1c *h1c = h1s->h1c; struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req); - size_t ret = 0; + size_t sz, offset = 0, ret = 0; TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){count}); @@ -4420,21 +4620,55 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, goto out; } - /* TODO: add check on curr_len if CLEN */ + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + TRACE_STATE("Bodyless message, disable fastfwd", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } - if (h1m->flags & H1_MF_CHNK) { + if (h1m->flags & H1_MF_CLEN) { + if ((flags & NEGO_FF_FL_EXACT_SIZE) && count > h1m->curr_len) { + TRACE_ERROR("more payload than announced", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } + } + else if (h1m->flags & H1_MF_CHNK) { if (h1m->curr_len) { BUG_ON(h1m->state != H1_MSG_DATA); - if (count > h1m->curr_len) + if (count > h1m->curr_len) { + if ((flags & NEGO_FF_FL_EXACT_SIZE) && count > h1m->curr_len) { + TRACE_ERROR("chunk bigger than announced", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } count = h1m->curr_len; + } } else { BUG_ON(h1m->state != H1_MSG_CHUNK_CRLF && h1m->state != H1_MSG_CHUNK_SIZE); - if (!h1_make_chunk(h1s, h1m, count)) { + if (flags & NEGO_FF_FL_EXACT_SIZE) { + if (!h1_make_chunk(h1s, h1m, count)) { h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; - goto out; + goto out; + } + h1m->curr_len = count; + } + else { + /* The producer does not know the chunk size, thus this will be emitted at the + * end, in done_ff(). So splicing cannot be used (see TODO below). + * We will reserve 10 bytes to handle at most 4Go chunk ! + * (<8-bytes SIZE><CRLF><CHUNK-DATA>) + */ + if (count > MAX_RANGE(unsigned int)) + count = MAX_RANGE(unsigned int); + offset = 10; + /* Add 2 more bytes to finish the previous chunk */ + if (h1m->state == H1_MSG_CHUNK_CRLF) + offset += 2; + goto no_splicing; } - h1m->curr_len = count; } } @@ -4445,7 +4679,7 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, * and then data in pipe, or the opposite. For now, it is not * supported to mix data. */ - if (!b_data(input) && !b_data(&h1c->obuf) && may_splice) { + if (!b_data(input) && !b_data(&h1c->obuf) && (flags & NEGO_FF_FL_MAY_SPLICE)) { #if defined(USE_LINUX_SPLICE) if (h1c->conn->xprt->snd_pipe && (h1s->sd->iobuf.pipe || (pipes_used < global.maxpipes && (h1s->sd->iobuf.pipe = get_pipe())))) { h1s->sd->iobuf.offset = 0; @@ -4458,8 +4692,8 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, TRACE_DEVEL("Unable to allocate pipe for splicing, fallback to buffer", H1_EV_STRM_SEND, h1c->conn, h1s); } - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + no_splicing: + if (!h1_get_obuf(h1c)) { h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; TRACE_STATE("waiting for opposite h1c obuf allocation", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s); goto out; @@ -4468,21 +4702,22 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, if (b_space_wraps(&h1c->obuf)) b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf)); - h1s->sd->iobuf.buf = &h1c->obuf; - h1s->sd->iobuf.offset = 0; - h1s->sd->iobuf.data = 0; - - /* Cannot forward more than available room in output buffer */ - if (count > b_room(&h1c->obuf)) - count = b_room(&h1c->obuf); - - if (!count) { + if (b_contig_space(&h1c->obuf) <= offset) { h1c->flags |= H1C_F_OUT_FULL; h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; TRACE_STATE("output buffer full", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s); goto out; } + /* Cannot forward more than available room in output buffer */ + sz = b_contig_space(&h1c->obuf) - offset; + if (count > sz) + count = sz; + + h1s->sd->iobuf.buf = &h1c->obuf; + h1s->sd->iobuf.offset = offset; + h1s->sd->iobuf.data = 0; + /* forward remaining input data */ if (b_data(input)) { size_t xfer = count; @@ -4529,6 +4764,17 @@ static size_t h1_done_ff(struct stconn *sc) if (b_room(&h1c->obuf) == sd->iobuf.offset) h1c->flags |= H1C_F_OUT_FULL; + if (sd->iobuf.data && sd->iobuf.offset) { + struct buffer buf = b_make(b_orig(&h1c->obuf), b_size(&h1c->obuf), + b_peek_ofs(&h1c->obuf, b_data(&h1c->obuf) - sd->iobuf.data + sd->iobuf.offset), + sd->iobuf.data); + h1_prepend_chunk_size(&buf, sd->iobuf.data, sd->iobuf.offset - ((h1m->state == H1_MSG_CHUNK_CRLF) ? 2 : 0)); + if (h1m->state == H1_MSG_CHUNK_CRLF) + h1_prepend_chunk_crlf(&buf); + b_add(&h1c->obuf, sd->iobuf.offset); + h1m->state = H1_MSG_CHUNK_CRLF; + } + total = sd->iobuf.data; sd->iobuf.buf = NULL; sd->iobuf.offset = 0; @@ -4583,6 +4829,7 @@ static int h1_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res); struct sedesc *sdo = NULL; size_t total = 0, try = 0; + unsigned int nego_flags = NEGO_FF_FL_NONE; int ret = 0; TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count}); @@ -4612,10 +4859,15 @@ static int h1_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) retry: ret = 0; - if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN)) && count > h1m->curr_len) + if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN)) && count > h1m->curr_len) { + flags |= NEGO_FF_FL_EXACT_SIZE; count = h1m->curr_len; + } + + if (h1c->conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)) + nego_flags |= NEGO_FF_FL_MAY_SPLICE; - try = se_nego_ff(sdo, &h1c->ibuf, count, h1c->conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)); + try = se_nego_ff(sdo, &h1c->ibuf, count, nego_flags); if (b_room(&h1c->ibuf) && (h1c->flags & H1C_F_IN_FULL)) { h1c->flags &= ~H1C_F_IN_FULL; TRACE_STATE("h1c ibuf not full anymore", H1_EV_STRM_RECV|H1_EV_H1C_BLK); @@ -4848,6 +5100,10 @@ static int h1_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *outp if (!(h1c->wait_event.events & SUB_RETRY_RECV)) h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); return 0; + case MUX_CTL_GET_NBSTRM: + return h1_used_streams(conn); + case MUX_CTL_GET_MAXSTRM: + return 1; default: return -1; } @@ -5032,25 +5288,35 @@ static int add_hdr_case_adjust(const char *from, const char *to, char **err) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int h1_takeover(struct connection *conn, int orig_tid) +static int h1_takeover(struct connection *conn, int orig_tid, int release) { struct h1c *h1c = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&h1c->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -5077,8 +5343,10 @@ static int h1_takeover(struct connection *conn, int orig_tid) h1c->task = new_task; new_task = NULL; - h1c->task->process = h1_timeout_task; - h1c->task->context = h1c; + if (!release) { + h1c->task->process = h1_timeout_task; + h1c->task->context = h1c; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -5088,10 +5356,26 @@ static int h1_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid); h1c->wait_event.tasklet = new_tasklet; - h1c->wait_event.tasklet->process = h1_io_cb; - h1c->wait_event.tasklet->context = h1c; - h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, - SUB_RETRY_RECV, &h1c->wait_event); + if (!release) { + h1c->wait_event.tasklet->process = h1_io_cb; + h1c->wait_event.tasklet->context = h1c; + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, + SUB_RETRY_RECV, &h1c->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&h1c->buf_wait.list)) + _b_dequeue(&h1c->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -5321,8 +5605,7 @@ static const struct mux_ops mux_http_ops = { .resume_fastfwd = h1_resume_fastfwd, .subscribe = h1_subscribe, .unsubscribe = h1_unsubscribe, - .shutr = h1_shutr, - .shutw = h1_shutw, + .shut = h1_shut, .show_fd = h1_show_fd, .show_sd = h1_show_sd, .ctl = h1_ctl, @@ -5349,8 +5632,7 @@ static const struct mux_ops mux_h1_ops = { .resume_fastfwd = h1_resume_fastfwd, .subscribe = h1_subscribe, .unsubscribe = h1_unsubscribe, - .shutr = h1_shutr, - .shutw = h1_shutw, + .shut = h1_shut, .show_fd = h1_show_fd, .show_sd = h1_show_sd, .ctl = h1_ctl, diff --git a/src/mux_h2.c b/src/mux_h2.c index 7ce0e6e..c28c5e1 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -306,7 +306,7 @@ enum { H2_STATS_COUNT /* must be the last member of the enum */ }; -static struct name_desc h2_stats[] = { +static struct stat_col h2_stats[] = { [H2_ST_HEADERS_RCVD] = { .name = "h2_headers_rcvd", .desc = "Total number of received HEADERS frames" }, [H2_ST_DATA_RCVD] = { .name = "h2_data_rcvd", @@ -355,25 +355,67 @@ static struct h2_counters { long long total_streams; /* total number of streams */ } h2_counters; -static void h2_fill_stats(void *data, struct field *stats) +static int h2_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h2_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[H2_ST_HEADERS_RCVD] = mkf_u64(FN_COUNTER, counters->headers_rcvd); - stats[H2_ST_DATA_RCVD] = mkf_u64(FN_COUNTER, counters->data_rcvd); - stats[H2_ST_SETTINGS_RCVD] = mkf_u64(FN_COUNTER, counters->settings_rcvd); - stats[H2_ST_RST_STREAM_RCVD] = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd); - stats[H2_ST_GOAWAY_RCVD] = mkf_u64(FN_COUNTER, counters->goaway_rcvd); - - stats[H2_ST_CONN_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->conn_proto_err); - stats[H2_ST_STRM_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->strm_proto_err); - stats[H2_ST_RST_STREAM_RESP] = mkf_u64(FN_COUNTER, counters->rst_stream_resp); - stats[H2_ST_GOAWAY_RESP] = mkf_u64(FN_COUNTER, counters->goaway_resp); - - stats[H2_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns); - stats[H2_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams); - stats[H2_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns); - stats[H2_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams); + for (; current_field < H2_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case H2_ST_HEADERS_RCVD: + metric = mkf_u64(FN_COUNTER, counters->headers_rcvd); + break; + case H2_ST_DATA_RCVD: + metric = mkf_u64(FN_COUNTER, counters->data_rcvd); + break; + case H2_ST_SETTINGS_RCVD: + metric = mkf_u64(FN_COUNTER, counters->settings_rcvd); + break; + case H2_ST_RST_STREAM_RCVD: + metric = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd); + break; + case H2_ST_GOAWAY_RCVD: + metric = mkf_u64(FN_COUNTER, counters->goaway_rcvd); + break; + case H2_ST_CONN_PROTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->conn_proto_err); + break; + case H2_ST_STRM_PROTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->strm_proto_err); + break; + case H2_ST_RST_STREAM_RESP: + metric = mkf_u64(FN_COUNTER, counters->rst_stream_resp); + break; + case H2_ST_GOAWAY_RESP: + metric = mkf_u64(FN_COUNTER, counters->goaway_resp); + break; + case H2_ST_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->open_conns); + break; + case H2_ST_OPEN_STREAM: + metric = mkf_u64(FN_GAUGE, counters->open_streams); + break; + case H2_ST_TOTAL_CONN: + metric = mkf_u64(FN_COUNTER, counters->total_conns); + break; + case H2_ST_TOTAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->total_streams); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module h2_stats_module = { @@ -770,13 +812,13 @@ static int h2_buf_available(void *target) struct h2c *h2c = target; struct h2s *h2s; - if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf)) { + if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf, DB_MUX_RX)) { h2c->flags &= ~H2_CF_DEM_DALLOC; h2c_restart_reading(h2c, 1); return 1; } - if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf))) { + if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf), DB_MUX_TX)) { h2c->flags &= ~H2_CF_MUX_MALLOC; if (h2c->flags & H2_CF_DEM_MROOM) { @@ -788,7 +830,7 @@ static int h2_buf_available(void *target) if ((h2c->flags & H2_CF_DEM_SALLOC) && (h2s = h2c_st_by_id(h2c, h2c->dsi)) && h2s_sc(h2s) && - b_alloc(&h2s->rxbuf)) { + b_alloc(&h2s->rxbuf, DB_SE_RX)) { h2c->flags &= ~H2_CF_DEM_SALLOC; h2c_restart_reading(h2c, 1); return 1; @@ -802,10 +844,8 @@ static inline struct buffer *h2_get_buf(struct h2c *h2c, struct buffer *bptr) struct buffer *buf = NULL; if (likely(!LIST_INLIST(&h2c->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - h2c->buf_wait.target = h2c; - h2c->buf_wait.wakeup_cb = h2_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &h2c->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_MUX_RX)) == NULL)) { + b_queue(DB_MUX_RX, &h2c->buf_wait, h2c, h2_buf_available); } return buf; } @@ -1153,8 +1193,7 @@ static void h2_release(struct h2c *h2c) hpack_dht_free(h2c->ddht); - if (LIST_INLIST(&h2c->buf_wait.list)) - LIST_DEL_INIT(&h2c->buf_wait.list); + b_dequeue(&h2c->buf_wait); h2_release_buf(h2c, &h2c->dbuf); h2_release_mbuf(h2c); @@ -1222,6 +1261,20 @@ static inline int h2s_mws(const struct h2s *h2s) return h2s->sws + h2s->h2c->miw; } +/* Returns 1 if the H2 error of the opposite side is forwardable to the peer. + * Otherwise 0 is returned. + * For now, only CANCEL from the client is forwardable to the server. + */ +static inline int h2s_is_forwardable_abort(struct h2s *h2s, struct se_abort_info *reason) +{ + enum h2_err err = H2_ERR_NO_ERROR; + + if (reason && ((reason->info & SE_ABRT_SRC_MASK) >> SE_ABRT_SRC_SHIFT) == SE_ABRT_SRC_MUX_H2) + err = reason->code; + + return ((h2s->h2c->flags & H2_CF_IS_BACK) && (err == H2_ERR_CANCEL)); +} + /* marks an error on the connection. Before settings are sent, we must not send * a GOAWAY frame, and the error state will prevent h2c_send_goaway_error() * from verifying this so we set H2_CF_GOAWAY_FAILED to make sure it will not @@ -2770,6 +2823,10 @@ static int h2c_handle_rst_stream(struct h2c *h2c, struct h2s *h2s) if (h2s_sc(h2s)) { se_fl_set_error(h2s->sd); + if (!h2s->sd->abort_info.info) { + h2s->sd->abort_info.info = (SE_ABRT_SRC_MUX_H2 << SE_ABRT_SRC_SHIFT); + h2s->sd->abort_info.code = h2s->errcode; + } h2s_alert(h2s); } @@ -4344,8 +4401,13 @@ static int h2_process(struct h2c *h2c) if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY) && (b_data(&h2c->dbuf) || (h2c->flags & H2_CF_RCVD_SHUT))) { + int prev_glitches = h2c->glitches; + h2_process_demux(h2c); + if (h2c->glitches != prev_glitches && !(h2c->flags & H2_CF_IS_BACK)) + session_add_glitch_ctr(h2c->conn->owner, h2c->glitches - prev_glitches); + if (h2c->st0 >= H2_CS_ERROR || (h2c->flags & H2_CF_ERROR)) b_reset(&h2c->dbuf); @@ -4664,6 +4726,12 @@ static int h2_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *outp case MUX_CTL_GET_GLITCHES: return h2c->glitches; + case MUX_CTL_GET_NBSTRM: + return h2c->nb_streams; + + case MUX_CTL_GET_MAXSTRM: + return h2c->streams_limit; + default: return -1; } @@ -4772,6 +4840,10 @@ static void h2_detach(struct sedesc *sd) } } if (eb_is_empty(&h2c->streams_by_id)) { + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1); if (session_check_idle_conn(h2c->conn->owner, h2c->conn) != 0) { /* At this point either the connection is destroyed, or it's been added to the server idle list, just stop */ TRACE_DEVEL("leaving without reusable idle connection", H2_EV_STRM_END); @@ -4811,7 +4883,7 @@ static void h2_detach(struct sedesc *sd) } else if (!h2c->conn->hash_node->node.node.leaf_p && h2_avail_streams(h2c->conn) > 0 && objt_server(h2c->conn->target) && - !LIST_INLIST(&h2c->conn->session_list)) { + !LIST_INLIST(&h2c->conn->sess_el)) { srv_add_to_avail_list(__objt_server(h2c->conn->target), h2c->conn); } } @@ -4837,7 +4909,7 @@ static void h2_detach(struct sedesc *sd) } /* Performs a synchronous or asynchronous shutr(). */ -static void h2_do_shutr(struct h2s *h2s) +static void h2_do_shutr(struct h2s *h2s, struct se_abort_info *reason) { struct h2c *h2c = h2s->h2c; @@ -4860,6 +4932,10 @@ static void h2_do_shutr(struct h2s *h2s) h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM); } + else if (h2s_is_forwardable_abort(h2s, reason)) { + TRACE_STATE("shutr using opposite endp code", H2_EV_STRM_SHUT, h2c->conn, h2s); + h2s_error(h2s, reason->code); + } else if (!(h2s->flags & H2_SF_HEADERS_SENT)) { /* Nothing was never sent for this stream, so reset with * REFUSED_STREAM error to let the client retry the @@ -4905,8 +4981,9 @@ add_to_list: return; } + /* Performs a synchronous or asynchronous shutw(). */ -static void h2_do_shutw(struct h2s *h2s) +static void h2_do_shutw(struct h2s *h2s, struct se_abort_info *reason) { struct h2c *h2c = h2s->h2c; @@ -4916,6 +4993,7 @@ static void h2_do_shutw(struct h2s *h2s) TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s); if (h2s->st != H2_SS_ERROR && + !h2s_is_forwardable_abort(h2s, reason) && (h2s->flags & (H2_SF_HEADERS_SENT | H2_SF_MORE_HTX_DATA)) == H2_SF_HEADERS_SENT) { /* we can cleanly close using an empty data frame only after headers * and if no more data is expected to be sent. @@ -4940,6 +5018,10 @@ static void h2_do_shutw(struct h2s *h2s) h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM); } + else if (h2s_is_forwardable_abort(h2s, reason)) { + TRACE_STATE("shutw using opposite endp code", H2_EV_STRM_SHUT, h2c->conn, h2s); + h2s_error(h2s, reason->code); + } else if (h2s->flags & H2_SF_MORE_HTX_DATA) { /* some unsent data were pending (e.g. abort during an upload), * let's send a CANCEL. @@ -5006,10 +5088,10 @@ struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state) } if (h2s->flags & H2_SF_WANT_SHUTW) - h2_do_shutw(h2s); + h2_do_shutw(h2s, NULL); if (h2s->flags & H2_SF_WANT_SHUTR) - h2_do_shutr(h2s); + h2_do_shutr(h2s, NULL); if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) { /* We're done trying to send, remove ourself from the send_list */ @@ -5028,24 +5110,17 @@ struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state) return t; } -/* shutr() called by the stream connector (mux_ops.shutr) */ -static void h2_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct h2s *h2s = __sc_mux_strm(sc); - - TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); - if (mode) - h2_do_shutr(h2s); - TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); -} - -/* shutw() called by the stream connector (mux_ops.shutw) */ -static void h2_shutw(struct stconn *sc, enum co_shw_mode mode) +static void h2_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct h2s *h2s = __sc_mux_strm(sc); TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); - h2_do_shutw(h2s); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) { + /* Pass the reason for silent shutw only (abort) */ + h2_do_shutw(h2s, (mode & SE_SHW_SILENT) ? reason : NULL); + } + if (mode & SE_SHR_RESET) + h2_do_shutr(h2s, reason); TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); } @@ -6197,10 +6272,9 @@ static size_t h2s_snd_bhdrs(struct h2s *h2s, struct htx *htx) } /* Try to send a DATA frame matching HTTP response present in HTX structure - * present in <buf>, for stream <h2s>. Returns the number of bytes sent. The - * caller must check the stream's status to detect any error which might have - * happened subsequently to a successful send. Returns the number of data bytes - * consumed, or zero if nothing done. + * present in <buf>, for stream <h2s>. The caller must check the stream's status + * to detect any error which might have happened subsequently to a successful + * send. Returns the number of data bytes consumed, or zero if nothing done. */ static size_t h2s_make_data(struct h2s *h2s, struct buffer *buf, size_t count) { @@ -7095,7 +7169,7 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in return total; } -static size_t h2_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t h2_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct h2s *h2s = __sc_mux_strm(sc); struct h2c *h2c = h2s->h2c; @@ -7454,25 +7528,35 @@ static int h2_show_sd(struct buffer *msg, struct sedesc *sd, const char *pfx) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int h2_takeover(struct connection *conn, int orig_tid) +static int h2_takeover(struct connection *conn, int orig_tid, int release) { struct h2c *h2c = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&h2c->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -7499,8 +7583,10 @@ static int h2_takeover(struct connection *conn, int orig_tid) h2c->task = new_task; new_task = NULL; - h2c->task->process = h2_timeout_task; - h2c->task->context = h2c; + if (!release) { + h2c->task->process = h2_timeout_task; + h2c->task->context = h2c; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -7510,10 +7596,26 @@ static int h2_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid); h2c->wait_event.tasklet = new_tasklet; - h2c->wait_event.tasklet->process = h2_io_cb; - h2c->wait_event.tasklet->context = h2c; - h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx, - SUB_RETRY_RECV, &h2c->wait_event); + if (!release) { + h2c->wait_event.tasklet->process = h2_io_cb; + h2c->wait_event.tasklet->context = h2c; + h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx, + SUB_RETRY_RECV, &h2c->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&h2c->buf_wait.list)) + _b_dequeue(&h2c->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -7690,8 +7792,7 @@ static const struct mux_ops h2_ops = { .destroy = h2_destroy, .avail_streams = h2_avail_streams, .used_streams = h2_used_streams, - .shutr = h2_shutr, - .shutw = h2_shutw, + .shut = h2_shut, .ctl = h2_ctl, .sctl = h2_sctl, .show_fd = h2_show_fd, diff --git a/src/mux_pt.c b/src/mux_pt.c index 3cca6a1..6dbbe04 100644 --- a/src/mux_pt.c +++ b/src/mux_pt.c @@ -462,39 +462,30 @@ static int mux_pt_avail_streams(struct connection *conn) return 1 - mux_pt_used_streams(conn); } -static void mux_pt_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct connection *conn = __sc_conn(sc); - struct mux_pt_ctx *ctx = conn->ctx; - - TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc); - - se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); - if (conn_xprt_ready(conn) && conn->xprt->shutr) - conn->xprt->shutr(conn, conn->xprt_ctx, - (mode == CO_SHR_DRAIN)); - else if (mode == CO_SHR_DRAIN) - conn_ctrl_drain(conn); - if (se_fl_test(ctx->sd, SE_FL_SHW)) - conn_full_close(conn); - - TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc); -} - -static void mux_pt_shutw(struct stconn *sc, enum co_shw_mode mode) +static void mux_pt_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct connection *conn = __sc_conn(sc); struct mux_pt_ctx *ctx = conn->ctx; TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) { + if (conn_xprt_ready(conn) && conn->xprt->shutw) + conn->xprt->shutw(conn, conn->xprt_ctx, (mode & SE_SHW_NORMAL)); + if (conn->flags & CO_FL_SOCK_RD_SH) + conn_full_close(conn); + else + conn_sock_shutw(conn, (mode & SE_SHW_NORMAL)); + } - if (conn_xprt_ready(conn) && conn->xprt->shutw) - conn->xprt->shutw(conn, conn->xprt_ctx, - (mode == CO_SHW_NORMAL)); - if (!se_fl_test(ctx->sd, SE_FL_SHR)) - conn_sock_shutw(conn, (mode == CO_SHW_NORMAL)); - else - conn_full_close(conn); + if (mode & (SE_SHR_RESET|SE_SHR_DRAIN)) { + se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + if (conn_xprt_ready(conn) && conn->xprt->shutr) + conn->xprt->shutr(conn, conn->xprt_ctx, (mode & SE_SHR_DRAIN)); + else if (mode & SE_SHR_DRAIN) + conn_ctrl_drain(conn); + if (conn->flags & CO_FL_SOCK_WR_SH) + conn_full_close(conn); + } TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc); } @@ -582,7 +573,7 @@ static inline struct sedesc *mux_pt_opposite_sd(struct mux_pt_ctx *ctx) return sdo; } -static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct connection *conn = __sc_conn(sc); struct mux_pt_ctx *ctx = conn->ctx; @@ -597,7 +588,7 @@ static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t cou * and then data in pipe, or the opposite. For now, it is not * supported to mix data. */ - if (!b_data(input) && may_splice) { + if (!b_data(input) && (flags & NEGO_FF_FL_MAY_SPLICE)) { if (conn->xprt->snd_pipe && (ctx->sd->iobuf.pipe || (pipes_used < global.maxpipes && (ctx->sd->iobuf.pipe = get_pipe())))) { ctx->sd->iobuf.offset = 0; ctx->sd->iobuf.data = 0; @@ -653,6 +644,7 @@ static int mux_pt_fastfwd(struct stconn *sc, unsigned int count, unsigned int fl struct mux_pt_ctx *ctx = conn->ctx; struct sedesc *sdo = NULL; size_t total = 0, try = 0; + unsigned int nego_flags = NEGO_FF_FL_NONE; int ret = 0; TRACE_ENTER(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){count}); @@ -665,7 +657,10 @@ static int mux_pt_fastfwd(struct stconn *sc, unsigned int count, unsigned int fl goto out; } - try = se_nego_ff(sdo, &BUF_NULL, count, conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)); + if (conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)) + nego_flags |= NEGO_FF_FL_MAY_SPLICE; + + try = se_nego_ff(sdo, &BUF_NULL, count, nego_flags); if (sdo->iobuf.flags & IOBUF_FL_NO_FF) { /* Fast forwarding is not supported by the consumer */ se_fl_clr(ctx->sd, SE_FL_MAY_FASTFWD_PROD); @@ -786,6 +781,7 @@ static int mux_pt_unsubscribe(struct stconn *sc, int event_type, struct wait_eve static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) { int ret = 0; + switch (mux_ctl) { case MUX_CTL_STATUS: if (!(conn->flags & CO_FL_WAIT_XPRT)) @@ -793,6 +789,10 @@ static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void * return ret; case MUX_CTL_EXIT_STATUS: return MUX_ES_UNKNOWN; + case MUX_CTL_GET_NBSTRM: + return mux_pt_used_streams(conn); + case MUX_CTL_GET_MAXSTRM: + return 1; default: return -1; } @@ -862,8 +862,7 @@ const struct mux_ops mux_tcp_ops = { .destroy = mux_pt_destroy_meth, .ctl = mux_pt_ctl, .sctl = mux_pt_sctl, - .shutr = mux_pt_shutr, - .shutw = mux_pt_shutw, + .shut = mux_pt_shut, .flags = MX_FL_NONE, .name = "PASS", }; @@ -888,8 +887,7 @@ const struct mux_ops mux_pt_ops = { .destroy = mux_pt_destroy_meth, .ctl = mux_pt_ctl, .sctl = mux_pt_sctl, - .shutr = mux_pt_shutr, - .shutw = mux_pt_shutw, + .shut = mux_pt_shut, .flags = MX_FL_NONE|MX_FL_NO_UPG, .name = "PASS", }; diff --git a/src/mux_quic.c b/src/mux_quic.c index 05c92fa..ae504ee 100644 --- a/src/mux_quic.c +++ b/src/mux_quic.c @@ -3,6 +3,7 @@ #include <import/eb64tree.h> #include <haproxy/api.h> +#include <haproxy/chunk.h> #include <haproxy/connection.h> #include <haproxy/dynbuf.h> #include <haproxy/h3.h> @@ -13,6 +14,7 @@ #include <haproxy/qmux_http.h> #include <haproxy/qmux_trace.h> #include <haproxy/quic_conn.h> +#include <haproxy/quic_fctl.h> #include <haproxy/quic_frame.h> #include <haproxy/quic_sock.h> #include <haproxy/quic_stream.h> @@ -58,6 +60,8 @@ static void qcs_free(struct qcs *qcs) /* Safe to use even if already removed from the list. */ LIST_DEL_INIT(&qcs->el_opening); LIST_DEL_INIT(&qcs->el_send); + LIST_DEL_INIT(&qcs->el_fctl); + LIST_DEL_INIT(&qcs->el_buf); /* Release stream endpoint descriptor. */ BUG_ON(qcs->sd && !se_fl_test(qcs->sd, SE_FL_ORPHAN)); @@ -68,11 +72,10 @@ static void qcs_free(struct qcs *qcs) qcc->app_ops->detach(qcs); /* Release qc_stream_desc buffer from quic-conn layer. */ - qc_stream_desc_release(qcs->stream, qcs->tx.sent_offset); + qc_stream_desc_release(qcs->stream, qcs->tx.fc.off_real); - /* Free Rx/Tx buffers. */ + /* Free Rx buffer. */ qcs_free_ncbuf(qcs, &qcs->rx.ncbuf); - b_free(&qcs->tx.buf); /* Remove qcs from qcc tree. */ eb64_delete(&qcs->by_id); @@ -97,34 +100,45 @@ static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type) qcs->stream = NULL; qcs->qcc = qcc; - qcs->sd = NULL; qcs->flags = QC_SF_NONE; qcs->st = QC_SS_IDLE; qcs->ctx = NULL; + qcs->sd = sedesc_new(); + if (!qcs->sd) + goto err; + qcs->sd->se = qcs; + qcs->sd->conn = qcc->conn; + se_fl_set(qcs->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST); + se_expect_no_data(qcs->sd); + + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_QUIC_SND)) + se_fl_set(qcs->sd, SE_FL_MAY_FASTFWD_CONS); + /* App callback attach may register the stream for http-request wait. * These fields must be initialed before. */ LIST_INIT(&qcs->el_opening); LIST_INIT(&qcs->el_send); + LIST_INIT(&qcs->el_fctl); + LIST_INIT(&qcs->el_buf); qcs->start = TICK_ETERNITY; /* store transport layer stream descriptor in qcc tree */ qcs->id = qcs->by_id.key = id; eb64_insert(&qcc->streams_by_id, &qcs->by_id); - /* If stream is local, use peer remote-limit, or else the opposite. */ + /* Different limits can be set by the peer for local and remote bidi streams. */ if (quic_stream_is_bidi(id)) { - qcs->tx.msd = quic_stream_is_local(qcc, id) ? qcc->rfctl.msd_bidi_r : - qcc->rfctl.msd_bidi_l; + qfctl_init(&qcs->tx.fc, quic_stream_is_local(qcc, id) ? + qcc->rfctl.msd_bidi_r : qcc->rfctl.msd_bidi_l); } else if (quic_stream_is_local(qcc, id)) { - qcs->tx.msd = qcc->rfctl.msd_uni_l; + qfctl_init(&qcs->tx.fc, qcc->rfctl.msd_uni_l); + } + else { + qfctl_init(&qcs->tx.fc, 0); } - - /* Properly set flow-control blocking if initial MSD is nul. */ - if (!qcs->tx.msd) - qcs->flags |= QC_SF_BLK_SFCTL; qcs->rx.ncbuf = NCBUF_NULL; qcs->rx.app_buf = BUF_NULL; @@ -139,10 +153,6 @@ static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type) } qcs->rx.msd_init = qcs->rx.msd; - qcs->tx.buf = BUF_NULL; - qcs->tx.offset = 0; - qcs->tx.sent_offset = 0; - qcs->wait_event.tasklet = NULL; qcs->wait_event.events = 0; qcs->subs = NULL; @@ -423,15 +433,6 @@ int qcs_is_close_remote(struct qcs *qcs) return qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO; } -/* Allocate if needed buffer <bptr> for stream <qcs>. - * - * Returns the buffer instance or NULL on allocation failure. - */ -struct buffer *qcs_get_buf(struct qcs *qcs, struct buffer *bptr) -{ - return b_alloc(bptr); -} - /* Allocate if needed buffer <ncbuf> for stream <qcs>. * * Returns the buffer instance or NULL on allocation failure. @@ -441,7 +442,7 @@ static struct ncbuf *qcs_get_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf) struct buffer buf = BUF_NULL; if (ncb_is_null(ncbuf)) { - if (!b_alloc(&buf)) + if (!b_alloc(&buf, DB_MUX_RX)) return NULL; *ncbuf = ncb_make(buf.area, buf.size, 0); @@ -511,6 +512,35 @@ void qcs_notify_send(struct qcs *qcs) } } +/* Notify on a new stream-desc buffer available for <qcc> connection. + * + * Returns true if a stream was woken up. If false is returned, this indicates + * to the caller that it's currently unnecessary to notify for the rest of the + * available buffers. + */ +int qcc_notify_buf(struct qcc *qcc) +{ + struct qcs *qcs; + int ret = 0; + + TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn); + + if (qcc->flags & QC_CF_CONN_FULL) { + TRACE_STATE("new stream desc buffer available", QMUX_EV_QCC_WAKE, qcc->conn); + qcc->flags &= ~QC_CF_CONN_FULL; + } + + if (!LIST_ISEMPTY(&qcc->buf_wait_list)) { + qcs = LIST_ELEM(qcc->buf_wait_list.n, struct qcs *, el_buf); + LIST_DEL_INIT(&qcs->el_buf); + qcs_notify_send(qcs); + ret = 1; + } + + TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn); + return ret; +} + /* A fatal error is detected locally for <qcc> connection. It should be closed * with a CONNECTION_CLOSE using <err> code. Set <app> to true to indicate that * the code must be considered as an application level error. This function @@ -536,6 +566,28 @@ void qcc_set_error(struct qcc *qcc, int err, int app) tasklet_wakeup(qcc->wait_event.tasklet); } +/* Increment glitch counter for <qcc> connection by <inc> steps. If configured + * threshold reached, close the connection with an error code. + */ +int qcc_report_glitch(struct qcc *qcc, int inc) +{ + const int max = global.tune.quic_frontend_glitches_threshold; + + qcc->glitches += inc; + if (max && qcc->glitches >= max && !(qcc->flags & QC_CF_ERRL)) { + if (qcc->app_ops->report_susp) { + qcc->app_ops->report_susp(qcc->ctx); + qcc_set_error(qcc, qcc->err.code, 1); + } + else { + qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0); + } + return 1; + } + + return 0; +} + /* Open a locally initiated stream for the connection <qcc>. Set <bidi> for a * bidirectional stream, else an unidirectional stream is opened. The next * available ID on the connection will be used according to the stream type. @@ -650,17 +702,6 @@ struct stconn *qcs_attach_sc(struct qcs *qcs, struct buffer *buf, char fin) struct qcc *qcc = qcs->qcc; struct session *sess = qcc->conn->owner; - qcs->sd = sedesc_new(); - if (!qcs->sd) - return NULL; - - qcs->sd->se = qcs; - qcs->sd->conn = qcc->conn; - se_fl_set(qcs->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST); - se_expect_no_data(qcs->sd); - - if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_QUIC_SND)) - se_fl_set(qcs->sd, SE_FL_MAY_FASTFWD_CONS); /* TODO duplicated from mux_h2 */ sess->t_idle = ns_to_ms(now_ns - sess->accept_ts) - sess->t_handshake; @@ -899,7 +940,7 @@ static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs) fin = 1; if (!(qcs->flags & QC_SF_READ_ABORTED)) { - ret = qcc->app_ops->decode_qcs(qcs, &b, fin); + ret = qcc->app_ops->rcv_buf(qcs, &b, fin); if (ret < 0) { TRACE_ERROR("decoding error", QMUX_EV_QCS_RECV, qcc->conn, qcs); goto err; @@ -930,25 +971,170 @@ static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs) return 1; } +/* Allocate if needed and retrieve <qcs> stream buffer for data reception. + * + * Returns buffer pointer. May be NULL on allocation failure. + */ +struct buffer *qcc_get_stream_rxbuf(struct qcs *qcs) +{ + return b_alloc(&qcs->rx.app_buf, DB_MUX_RX); +} + +/* Allocate if needed and retrieve <qcs> stream buffer for data emission. + * + * <err> is an output argument which is useful to differentiate the failure + * cause when the buffer cannot be allocated. It is set to 0 if the connection + * buffer limit is reached. For fatal errors, its value is non-zero. + * + * Returns buffer pointer. May be NULL on allocation failure, in which case + * <err> will refer to the cause. + */ +struct buffer *qcc_get_stream_txbuf(struct qcs *qcs, int *err) +{ + struct qcc *qcc = qcs->qcc; + int buf_avail; + struct buffer *out = qc_stream_buf_get(qcs->stream); + + /* Stream must not try to reallocate a buffer if currently waiting for one. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + *err = 0; + + if (!out) { + if (qcc->flags & QC_CF_CONN_FULL) { + LIST_APPEND(&qcc->buf_wait_list, &qcs->el_buf); + goto out; + } + + out = qc_stream_buf_alloc(qcs->stream, qcs->tx.fc.off_real, + &buf_avail); + if (!out) { + if (buf_avail) { + TRACE_ERROR("stream desc alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); + *err = 1; + goto out; + } + + TRACE_STATE("hitting stream desc buffer limit", QMUX_EV_QCS_SEND, qcc->conn, qcs); + LIST_APPEND(&qcc->buf_wait_list, &qcs->el_buf); + qcc->flags |= QC_CF_CONN_FULL; + goto out; + } + + if (!b_alloc(out, DB_MUX_TX)) { + TRACE_ERROR("buffer alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); + *err = 1; + goto out; + } + } + + out: + return out; +} + +/* Returns total number of bytes not already sent to quic-conn layer. */ +static uint64_t qcs_prep_bytes(const struct qcs *qcs) +{ + struct buffer *out = qc_stream_buf_get(qcs->stream); + uint64_t diff, base_off; + + if (!out) + return 0; + + /* if ack_offset < buf_offset, it points to an older buffer. */ + base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset); + diff = qcs->tx.fc.off_real - base_off; + return b_data(out) - diff; +} + +/* Try to realign <out> buffer for <qcs> stream. This is done only if there is + * no data waiting for ACK. + * + * Returns 0 if realign was performed else non-zero. + */ +int qcc_realign_stream_txbuf(const struct qcs *qcs, struct buffer *out) +{ + if (qcs_prep_bytes(qcs) == b_data(out)) { + b_slow_realign(out, trash.area, b_data(out)); + return 0; + } + + return 1; +} + +/* Release the current <qcs> Tx buffer. This is useful if space left is not + * enough anymore. A new instance can then be allocated to continue sending. + * + * This operation fails if there is not yet sent bytes in the buffer. In this + * case, stream layer should interrupt sending until further notification. + * + * Returns 0 if buffer is released and a new one can be allocated or non-zero + * if there is still remaining data. + */ +int qcc_release_stream_txbuf(struct qcs *qcs) +{ + const uint64_t bytes = qcs_prep_bytes(qcs); + + /* Cannot release buffer if prepared data is not fully sent. */ + if (bytes) { + qcs->flags |= QC_SF_BLK_MROOM; + return 1; + } + + qc_stream_buf_release(qcs->stream); + return 0; +} + +/* Returns true if stream layer can proceed to emission via <qcs>. */ +int qcc_stream_can_send(const struct qcs *qcs) +{ + return !(qcs->flags & QC_SF_BLK_MROOM) && !LIST_INLIST(&qcs->el_buf); +} + +/* Wakes up every streams of <qcc> which are currently waiting for sending but + * are blocked on connection flow control. + */ +static void qcc_notify_fctl(struct qcc *qcc) +{ + struct qcs *qcs; + + while (!LIST_ISEMPTY(&qcc->fctl_list)) { + qcs = LIST_ELEM(qcc->fctl_list.n, struct qcs *, el_fctl); + LIST_DEL_INIT(&qcs->el_fctl); + qcs_notify_send(qcs); + } +} + /* Prepare for the emission of RESET_STREAM on <qcs> with error code <err>. */ void qcc_reset_stream(struct qcs *qcs, int err) { struct qcc *qcc = qcs->qcc; + const uint64_t diff = qcs_prep_bytes(qcs); if ((qcs->flags & QC_SF_TO_RESET) || qcs_is_close_local(qcs)) return; + /* TODO if QCS waiting for buffer, it could be removed from + * <qcc.buf_wait_list> if sending is closed now. + */ + TRACE_STATE("reset stream", QMUX_EV_QCS_END, qcc->conn, qcs); qcs->flags |= QC_SF_TO_RESET; qcs->err = err; - /* Remove prepared stream data from connection flow-control calcul. */ - if (qcs->tx.offset > qcs->tx.sent_offset) { - const uint64_t diff = qcs->tx.offset - qcs->tx.sent_offset; - BUG_ON(qcc->tx.offsets - diff < qcc->tx.sent_offsets); - qcc->tx.offsets -= diff; - /* Reset qcs offset to prevent BUG_ON() on qcs_destroy(). */ - qcs->tx.offset = qcs->tx.sent_offset; + if (diff) { + const int soft_blocked = qfctl_sblocked(&qcc->tx.fc); + + /* Soft offset cannot be inferior to real one. */ + BUG_ON(qcc->tx.fc.off_soft - diff < qcc->tx.fc.off_real); + + /* Subtract to conn flow control data amount prepared on stream not yet sent. */ + qcc->tx.fc.off_soft -= diff; + if (soft_blocked && !qfctl_sblocked(&qcc->tx.fc)) + qcc_notify_fctl(qcc); + + /* Reset QCS soft off to prevent BUG_ON() on qcs_destroy(). */ + qcs->tx.fc.off_soft = qcs->tx.fc.off_real; } /* Report send error to stream-endpoint layer. */ @@ -957,15 +1143,16 @@ void qcc_reset_stream(struct qcs *qcs, int err) qcs_alert(qcs); } - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, 0); tasklet_wakeup(qcc->wait_event.tasklet); } /* Register <qcs> stream for emission of STREAM, STOP_SENDING or RESET_STREAM. * Set <urg> to 1 if stream content should be treated in priority compared to - * other streams. + * other streams. For STREAM emission, <count> must contains the size of the + * frame payload. This is used for flow control accounting. */ -void qcc_send_stream(struct qcs *qcs, int urg) +void qcc_send_stream(struct qcs *qcs, int urg, int count) { struct qcc *qcc = qcs->qcc; @@ -983,6 +1170,11 @@ void qcc_send_stream(struct qcs *qcs, int urg) LIST_APPEND(&qcs->qcc->send_list, &qcs->el_send); } + if (count) { + qfctl_sinc(&qcc->tx.fc, count); + qfctl_sinc(&qcs->tx.fc, count); + } + TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); } @@ -999,7 +1191,7 @@ void qcc_abort_stream_read(struct qcs *qcs) TRACE_STATE("abort stream read", QMUX_EV_QCS_END, qcc->conn, qcs); qcs->flags |= (QC_SF_TO_STOP_SENDING|QC_SF_READ_ABORTED); - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, 0); tasklet_wakeup(qcc->wait_event.tasklet); end: @@ -1203,17 +1395,19 @@ int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset, */ int qcc_recv_max_data(struct qcc *qcc, uint64_t max) { + int unblock_soft = 0, unblock_real = 0; + TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn); TRACE_PROTO("receiving MAX_DATA", QMUX_EV_QCC_RECV, qcc->conn); - if (qcc->rfctl.md < max) { - qcc->rfctl.md = max; + if (qfctl_set_max(&qcc->tx.fc, max, &unblock_soft, &unblock_real)) { TRACE_DATA("increase remote max-data", QMUX_EV_QCC_RECV, qcc->conn); - if (qcc->flags & QC_CF_BLK_MFCTL) { - qcc->flags &= ~QC_CF_BLK_MFCTL; + if (unblock_real) tasklet_wakeup(qcc->wait_event.tasklet); - } + + if (unblock_soft) + qcc_notify_fctl(qcc); } TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn); @@ -1249,16 +1443,18 @@ int qcc_recv_max_stream_data(struct qcc *qcc, uint64_t id, uint64_t max) goto err; if (qcs) { + int unblock_soft = 0, unblock_real = 0; + TRACE_PROTO("receiving MAX_STREAM_DATA", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs); - if (max > qcs->tx.msd) { - qcs->tx.msd = max; + if (qfctl_set_max(&qcs->tx.fc, max, &unblock_soft, &unblock_real)) { TRACE_DATA("increase remote max-stream-data", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs); - - if (qcs->flags & QC_SF_BLK_SFCTL) { - qcs->flags &= ~QC_SF_BLK_SFCTL; + if (unblock_real) { /* TODO optim: only wakeup IO-CB if stream has data to sent. */ tasklet_wakeup(qcc->wait_event.tasklet); } + + if (unblock_soft) + qcs_notify_send(qcs); } } @@ -1410,14 +1606,18 @@ int qcc_recv_stop_sending(struct qcc *qcc, uint64_t id, uint64_t err) } } - /* If FIN already reached, future RESET_STREAMS will be ignored. - * Manually set EOS in this case. - */ + /* Manually set EOS if FIN already reached as futures RESET_STREAM will be ignored in this case. */ if (qcs_sc(qcs) && se_fl_test(qcs->sd, SE_FL_EOI)) { se_fl_set(qcs->sd, SE_FL_EOS); qcs_alert(qcs); } + /* If not defined yet, set abort info for the sedesc */ + if (!qcs->sd->abort_info.info) { + qcs->sd->abort_info.info = (SE_ABRT_SRC_MUX_QUIC << SE_ABRT_SRC_SHIFT); + qcs->sd->abort_info.code = err; + } + /* RFC 9000 3.5. Solicited State Transitions * * An endpoint that receives a STOP_SENDING frame @@ -1500,12 +1700,12 @@ static void qcs_destroy(struct qcs *qcs) TRACE_ENTER(QMUX_EV_QCS_END, conn, qcs); - /* MUST not removed a stream with sending prepared data left. This is - * to ensure consistency on connection flow-control calculation. - */ - BUG_ON(qcs->tx.offset < qcs->tx.sent_offset); + if (!(qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL))) { + /* MUST not removed a stream with sending prepared data left. This is + * to ensure consistency on connection flow-control calculation. + */ + BUG_ON(qcs->tx.fc.off_soft != qcs->tx.fc.off_real); - if (!(qcc->flags & QC_CF_ERRL)) { if (quic_stream_is_remote(qcc, id)) qcc_release_remote_stream(qcc, id); } @@ -1515,114 +1715,52 @@ static void qcs_destroy(struct qcs *qcs) TRACE_LEAVE(QMUX_EV_QCS_END, conn); } -/* Transfer as much as possible data on <qcs> from <in> to <out>. This is done - * in respect with available flow-control at stream and connection level. +/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame + * is appended in <frm_list>. Set <fin> if this is supposed to be the last + * stream frame. If <out> is NULL an empty STREAM frame is built : this may be + * useful if FIN needs to be sent without any data left. Frame length will be + * truncated if greater than <fc_conn_wnd>. This allows to prepare several + * frames in a loop while respecting connection flow control window. * - * Returns the total bytes of transferred data or a negative error code. + * Returns the payload length of the STREAM frame or a negative error code. */ -static int qcs_xfer_data(struct qcs *qcs, struct buffer *out, struct buffer *in) +static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, + struct list *frm_list, uint64_t window_conn) { struct qcc *qcc = qcs->qcc; - int left, to_xfer; - int total = 0; + struct quic_frame *frm; + const uint64_t window_stream = qfctl_rcap(&qcs->tx.fc); + const uint64_t bytes = qcs_prep_bytes(qcs); + uint64_t total; TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - if (!qcs_get_buf(qcs, out)) { - TRACE_ERROR("buffer alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - /* - * QCS out buffer diagram - * head left to_xfer - * -------------> ----------> -----> - * -------------------------------------------------- - * |...............|xxxxxxxxxxx|<<<<< - * -------------------------------------------------- - * ^ ack-off ^ sent-off ^ off - * - * STREAM frame - * ^ ^ - * |xxxxxxxxxxxxxxxxx| - */ - - BUG_ON_HOT(qcs->tx.sent_offset < qcs->stream->ack_offset); - BUG_ON_HOT(qcs->tx.offset < qcs->tx.sent_offset); - BUG_ON_HOT(qcc->tx.offsets < qcc->tx.sent_offsets); + /* This must only be called if there is data left, or at least a standalone FIN. */ + BUG_ON((!out || !b_data(out)) && !fin); - left = qcs->tx.offset - qcs->tx.sent_offset; - to_xfer = QUIC_MIN(b_data(in), b_room(out)); + total = bytes; - BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd); - /* do not exceed flow control limit */ - if (qcs->tx.offset + to_xfer > qcs->tx.msd) { + /* do not exceed stream flow control limit */ + if (total > window_stream) { TRACE_DATA("do not exceed stream flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs); - to_xfer = qcs->tx.msd - qcs->tx.offset; + total = window_stream; } - BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md); - /* do not overcome flow control limit on connection */ - if (qcc->tx.offsets + to_xfer > qcc->rfctl.md) { + /* do not exceed connection flow control limit */ + if (total > window_conn) { TRACE_DATA("do not exceed conn flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs); - to_xfer = qcc->rfctl.md - qcc->tx.offsets; + total = window_conn; } - if (!left && !to_xfer) - goto out; - - total = b_force_xfer(out, in, to_xfer); - - out: - { - struct qcs_xfer_data_trace_arg arg = { - .prep = b_data(out), .xfer = total, - }; - TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_XFER_DATA, - qcc->conn, qcs, &arg); - } - - return total; - - err: - TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs); - return -1; -} - -/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame - * is appended in <frm_list>. Set <fin> if this is supposed to be the last - * stream frame. If <out> is NULL an empty STREAM frame is built : this may be - * useful if FIN needs to be sent without any data left. - * - * Returns the payload length of the STREAM frame or a negative error code. - */ -static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, - struct list *frm_list) -{ - struct qcc *qcc = qcs->qcc; - struct quic_frame *frm; - int head, total; - uint64_t base_off; - - TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - - /* if ack_offset < buf_offset, it points to an older buffer. */ - base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset); - BUG_ON(qcs->tx.sent_offset < base_off); - - head = qcs->tx.sent_offset - base_off; - total = out ? b_data(out) - head : 0; - BUG_ON(total < 0); + /* Reset FIN if bytes to send is capped by flow control. */ + if (total < bytes) + fin = 0; if (!total && !fin) { /* No need to send anything if total is NULL and no FIN to signal. */ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); return 0; } - BUG_ON((!total && qcs->tx.sent_offset > qcs->tx.offset) || - (total && qcs->tx.sent_offset >= qcs->tx.offset)); - BUG_ON(qcs->tx.sent_offset + total > qcs->tx.offset); - BUG_ON(qcc->tx.sent_offsets + total > qcc->rfctl.md); TRACE_PROTO("sending STREAM frame", QMUX_EV_QCS_SEND, qcc->conn, qcs); frm = qc_frm_alloc(QUIC_FT_STREAM_8); @@ -1638,7 +1776,7 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, if (total) { frm->stream.buf = out; - frm->stream.data = (unsigned char *)b_peek(out, head); + frm->stream.data = (unsigned char *)b_peek(out, b_data(out) - bytes); } else { /* Empty STREAM frame. */ @@ -1650,9 +1788,9 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, if (fin) frm->type |= QUIC_STREAM_FRAME_TYPE_FIN_BIT; - if (qcs->tx.sent_offset) { + if (qcs->tx.fc.off_real) { frm->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT; - frm->stream.offset.key = qcs->tx.sent_offset; + frm->stream.offset.key = qcs->tx.fc.off_real; } /* Always set length bit as we do not know if there is remaining frames @@ -1680,23 +1818,6 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, return -1; } -/* Check after transferring data from qcs.tx.buf if FIN must be set on the next - * STREAM frame for <qcs>. - * - * Returns true if FIN must be set else false. - */ -static int qcs_stream_fin(struct qcs *qcs) -{ - return qcs->flags & QC_SF_FIN_STREAM && !b_data(&qcs->tx.buf); -} - -/* Return true if <qcs> has data to send in new STREAM frames. */ -static forceinline int qcs_need_sending(struct qcs *qcs) -{ - return b_data(&qcs->tx.buf) || qcs->tx.sent_offset < qcs->tx.offset || - qcs_stream_fin(qcs); -} - /* This function must be called by the upper layer to inform about the sending * of a STREAM frame for <qcs> instance. The frame is of <data> length and on * <offset>. @@ -1708,42 +1829,45 @@ void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset) TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - BUG_ON(offset > qcs->tx.sent_offset); - BUG_ON(offset + data > qcs->tx.offset); + /* Real off MUST always be the greatest offset sent. */ + BUG_ON(offset > qcs->tx.fc.off_real); /* check if the STREAM frame has already been notified. It can happen * for retransmission. */ - if (offset + data < qcs->tx.sent_offset) { + if (offset + data < qcs->tx.fc.off_real) { TRACE_DEVEL("offset already notified", QMUX_EV_QCS_SEND, qcc->conn, qcs); goto out; } qcs_idle_open(qcs); - diff = offset + data - qcs->tx.sent_offset; + diff = offset + data - qcs->tx.fc.off_real; if (diff) { + struct quic_fctl *fc_conn = &qcc->tx.fc; + struct quic_fctl *fc_strm = &qcs->tx.fc; + + /* Ensure real offset never exceeds soft value. */ + BUG_ON(fc_conn->off_real + diff > fc_conn->off_soft); + BUG_ON(fc_strm->off_real + diff > fc_strm->off_soft); + /* increase offset sum on connection */ - qcc->tx.sent_offsets += diff; - BUG_ON_HOT(qcc->tx.sent_offsets > qcc->rfctl.md); - if (qcc->tx.sent_offsets == qcc->rfctl.md) { - qcc->flags |= QC_CF_BLK_MFCTL; - TRACE_STATE("connection flow-control reached", QMUX_EV_QCS_SEND, qcc->conn); + if (qfctl_rinc(fc_conn, diff)) { + TRACE_STATE("connection flow-control reached", + QMUX_EV_QCS_SEND, qcc->conn); } /* increase offset on stream */ - qcs->tx.sent_offset += diff; - BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.msd); - BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.offset); - if (qcs->tx.sent_offset == qcs->tx.msd) { - qcs->flags |= QC_SF_BLK_SFCTL; - TRACE_STATE("stream flow-control reached", QMUX_EV_QCS_SEND, qcc->conn, qcs); + if (qfctl_rinc(fc_strm, diff)) { + TRACE_STATE("stream flow-control reached", + QMUX_EV_QCS_SEND, qcc->conn, qcs); } - - /* If qcs.stream.buf is full, release it to the lower layer. */ - if (qcs->tx.offset == qcs->tx.sent_offset && - b_full(&qcs->stream->buf->buf)) { + /* Release buffer if everything sent and buf is full or stream is waiting for room. */ + if (!qcs_prep_bytes(qcs) && + (b_full(&qcs->stream->buf->buf) || qcs->flags & QC_SF_BLK_MROOM)) { qc_stream_buf_release(qcs->stream); + qcs->flags &= ~QC_SF_BLK_MROOM; + qcs_notify_send(qcs); } /* Add measurement for send rate. This is done at the MUX layer @@ -1752,7 +1876,7 @@ void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset) increment_send_rate(diff, 0); } - if (qcs->tx.offset == qcs->tx.sent_offset && !b_data(&qcs->tx.buf)) { + if (!qc_stream_buf_get(qcs->stream) || !qcs_prep_bytes(qcs)) { /* Remove stream from send_list if all was sent. */ LIST_DEL_INIT(&qcs->el_send); TRACE_STATE("stream sent done", QMUX_EV_QCS_SEND, qcc->conn, qcs); @@ -1842,7 +1966,7 @@ static int qcs_send_reset(struct qcs *qcs) frm->reset_stream.id = qcs->id; frm->reset_stream.app_error_code = qcs->err; - frm->reset_stream.final_size = qcs->tx.sent_offset; + frm->reset_stream.final_size = qcs->tx.fc.off_real; LIST_APPEND(&frms, &frm->list); if (qcc_send_frames(qcs->qcc, &frms)) { @@ -1910,87 +2034,46 @@ static int qcs_send_stop_sending(struct qcs *qcs) return 0; } -/* Used internally by qcc_io_send function. Proceed to send for <qcs>. This will - * transfer data from qcs buffer to its quic_stream counterpart. A STREAM frame - * is then generated and inserted in <frms> list. +/* Used internally by qcc_io_send function. Proceed to send for <qcs>. A STREAM + * frame is generated pointing to QCS stream descriptor content and inserted in + * <frms> list. Frame length will be truncated if greater than <window_conn>. + * This allows to prepare several frames in a loop while respecting connection + * flow control window. * - * Returns the total bytes transferred between qcs and quic_stream buffers. Can - * be null if out buffer cannot be allocated. On error a negative error code is - * used. + * Returns the payload length of the STREAM frame or a negative error code. */ -static int qcs_send(struct qcs *qcs, struct list *frms) +static int qcs_send(struct qcs *qcs, struct list *frms, uint64_t window_conn) { struct qcc *qcc = qcs->qcc; - struct buffer *buf = &qcs->tx.buf; struct buffer *out = qc_stream_buf_get(qcs->stream); - int xfer = 0, buf_avail; - char fin = 0; + int flen = 0; + const char fin = qcs->flags & QC_SF_FIN_STREAM; TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); /* Cannot send STREAM on remote unidirectional streams. */ BUG_ON(quic_stream_is_uni(qcs->id) && quic_stream_is_remote(qcc, qcs->id)); - if (b_data(buf)) { - /* Allocate <out> buffer if not already done. */ - if (!out) { - if (qcc->flags & QC_CF_CONN_FULL) - goto out; - - out = qc_stream_buf_alloc(qcs->stream, qcs->tx.offset, - &buf_avail); - if (!out) { - if (buf_avail) { - TRACE_ERROR("stream desc alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - TRACE_STATE("hitting stream desc buffer limit", QMUX_EV_QCS_SEND, qcc->conn, qcs); - qcc->flags |= QC_CF_CONN_FULL; - goto out; - } - } - - /* Transfer data from <buf> to <out>. */ - xfer = qcs_xfer_data(qcs, out, buf); - if (xfer < 0) - goto err; - - if (xfer > 0) { - qcs_notify_send(qcs); - qcs->flags &= ~QC_SF_BLK_MROOM; - } + /* This function must not be called if there is nothing to send. */ + BUG_ON(!fin && !qcs_prep_bytes(qcs)); - qcs->tx.offset += xfer; - BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd); - qcc->tx.offsets += xfer; - BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md); - - /* out buffer cannot be emptied if qcs offsets differ. */ - BUG_ON(!b_data(out) && qcs->tx.sent_offset != qcs->tx.offset); + /* Skip STREAM frame allocation if already subscribed for send. + * Happens on sendto transient error or network congestion. + */ + if (qcc->wait_event.events & SUB_RETRY_SEND) { + TRACE_DEVEL("already subscribed for sending", + QMUX_EV_QCS_SEND, qcc->conn, qcs); + goto err; } - /* FIN is set if all incoming data were transferred. */ - fin = qcs_stream_fin(qcs); - /* Build a new STREAM frame with <out> buffer. */ - if (qcs->tx.sent_offset != qcs->tx.offset || fin) { - /* Skip STREAM frame allocation if already subscribed for send. - * Happens on sendto transient error or network congestion. - */ - if (qcc->wait_event.events & SUB_RETRY_SEND) { - TRACE_DEVEL("already subscribed for sending", - QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - if (qcs_build_stream_frm(qcs, out, fin, frms) < 0) - goto err; - } + flen = qcs_build_stream_frm(qcs, out, fin, frms, window_conn); + if (flen < 0) + goto err; out: TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); - return xfer; + return flen; err: TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs); @@ -2008,7 +2091,8 @@ static int qcc_io_send(struct qcc *qcc) /* Temporary list for QCS on error. */ struct list qcs_failed = LIST_HEAD_INIT(qcs_failed); struct qcs *qcs, *qcs_tmp, *first_qcs = NULL; - int ret, total = 0; + uint64_t window_conn = qfctl_rcap(&qcc->tx.fc); + int ret, total = 0, resent; TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn); @@ -2055,8 +2139,8 @@ static int qcc_io_send(struct qcc *qcc) break; /* Stream must not be present in send_list if it has nothing to send. */ - BUG_ON(!(qcs->flags & (QC_SF_TO_STOP_SENDING|QC_SF_TO_RESET)) && - !qcs_need_sending(qcs)); + BUG_ON(!(qcs->flags & (QC_SF_FIN_STREAM|QC_SF_TO_STOP_SENDING|QC_SF_TO_RESET)) && + (!qcs->stream || !qcs_prep_bytes(qcs))); /* Each STOP_SENDING/RESET_STREAM frame is sent individually to * guarantee its emission. @@ -2070,7 +2154,8 @@ static int qcc_io_send(struct qcc *qcc) /* Remove stream from send_list if it had only STOP_SENDING * to send. */ - if (!(qcs->flags & QC_SF_TO_RESET) && !qcs_need_sending(qcs)) { + if (!(qcs->flags & (QC_SF_FIN_STREAM|QC_SF_TO_RESET)) && + (!qcs->stream || !qcs_prep_bytes(qcs))) { LIST_DEL_INIT(&qcs->el_send); continue; } @@ -2091,9 +2176,12 @@ static int qcc_io_send(struct qcc *qcc) continue; } - if (!(qcc->flags & QC_CF_BLK_MFCTL) && - !(qcs->flags & QC_SF_BLK_SFCTL)) { - if ((ret = qcs_send(qcs, &frms)) < 0) { + /* Total sent bytes must not exceed connection window. */ + BUG_ON(total > window_conn); + + if (!qfctl_rblocked(&qcc->tx.fc) && + !qfctl_rblocked(&qcs->tx.fc) && window_conn > total) { + if ((ret = qcs_send(qcs, &frms, window_conn - total)) < 0) { /* Temporarily remove QCS from send-list. */ LIST_DEL_INIT(&qcs->el_send); LIST_APPEND(&qcs_failed, &qcs->el_send); @@ -2117,7 +2205,10 @@ static int qcc_io_send(struct qcc *qcc) /* Retry sending until no frame to send, data rejected or connection * flow-control limit reached. */ - while (qcc_send_frames(qcc, &frms) == 0 && !(qcc->flags & QC_CF_BLK_MFCTL)) { + while (qcc_send_frames(qcc, &frms) == 0 && !qfctl_rblocked(&qcc->tx.fc)) { + window_conn = qfctl_rcap(&qcc->tx.fc); + resent = 0; + /* Reloop over <qcc.send_list>. Useful for streams which have * fulfilled their qc_stream_desc buf and have now release it. */ @@ -2126,16 +2217,20 @@ static int qcc_io_send(struct qcc *qcc) * new qc_stream_desc should be present in send_list as * long as transport layer can handle all data. */ - BUG_ON(qcs->stream->buf && !(qcs->flags & QC_SF_BLK_SFCTL)); + BUG_ON(qcs->stream->buf && !qfctl_rblocked(&qcs->tx.fc)); + + /* Total sent bytes must not exceed connection window. */ + BUG_ON(resent > window_conn); - if (!(qcs->flags & QC_SF_BLK_SFCTL)) { - if ((ret = qcs_send(qcs, &frms)) < 0) { + if (!qfctl_rblocked(&qcs->tx.fc) && window_conn > resent) { + if ((ret = qcs_send(qcs, &frms, window_conn - resent)) < 0) { LIST_DEL_INIT(&qcs->el_send); LIST_APPEND(&qcs_failed, &qcs->el_send); continue; } total += ret; + resent += ret; } } } @@ -2156,7 +2251,7 @@ static int qcc_io_send(struct qcc *qcc) LIST_APPEND(&qcc->send_list, &qcs->el_send); } - if (!(qcc->flags & QC_CF_BLK_MFCTL)) + if (!qfctl_rblocked(&qcc->tx.fc)) tasklet_wakeup(qcc->wait_event.tasklet); } @@ -2276,7 +2371,7 @@ static void qcc_shutdown(struct qcc *qcc) qcc_io_send(qcc); } else { - qcc->err = quic_err_app(QC_ERR_NO_ERROR); + qcc->err = quic_err_transport(QC_ERR_NO_ERROR); } /* Register "no error" code at transport layer. Do not use @@ -2381,9 +2476,7 @@ static int qcc_io_process(struct qcc *qcc) return 0; } -/* release function. This one should be called to free all resources allocated - * to the mux. - */ +/* Free all resources allocated for <qcc> connection. */ static void qcc_release(struct qcc *qcc) { struct connection *conn = qcc->conn; @@ -2391,8 +2484,6 @@ static void qcc_release(struct qcc *qcc) TRACE_ENTER(QMUX_EV_QCC_END, conn); - qcc_shutdown(qcc); - if (qcc->task) { task_destroy(qcc->task); qcc->task = NULL; @@ -2465,6 +2556,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int status) return NULL; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_QCC_WAKE); return NULL; @@ -2507,6 +2599,7 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta */ if (qcc_is_dead(qcc)) { TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn); + qcc_shutdown(qcc); qcc_release(qcc); } @@ -2519,6 +2612,17 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta return t; } +/* Minimal initialization of <qcc> members to use qcc_release() safely. */ +static void _qcc_init(struct qcc *qcc) +{ + qcc->conn = NULL; + qcc->task = NULL; + qcc->wait_event.tasklet = NULL; + qcc->app_ops = NULL; + qcc->streams_by_id = EB_ROOT_UNIQUE; + LIST_INIT(&qcc->lfctl.frms); +} + static int qmux_init(struct connection *conn, struct proxy *prx, struct session *sess, struct buffer *input) { @@ -2530,24 +2634,19 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc = pool_alloc(pool_head_qcc); if (!qcc) { TRACE_ERROR("alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_qcc; + goto err; } - qcc->conn = conn; + _qcc_init(qcc); conn->ctx = qcc; qcc->nb_hreq = qcc->nb_sc = 0; qcc->flags = 0; - - qcc->app_ops = NULL; - - qcc->streams_by_id = EB_ROOT_UNIQUE; + qcc->glitches = 0; + qcc->err = quic_err_transport(QC_ERR_NO_ERROR); /* Server parameters, params used for RX flow control. */ lparams = &conn->handle.qc->rx.params; - qcc->tx.sent_offsets = qcc->tx.offsets = 0; - - LIST_INIT(&qcc->lfctl.frms); qcc->lfctl.ms_bidi = qcc->lfctl.ms_bidi_init = lparams->initial_max_streams_bidi; qcc->lfctl.ms_uni = lparams->initial_max_streams_uni; qcc->lfctl.msd_bidi_l = lparams->initial_max_stream_data_bidi_local; @@ -2559,7 +2658,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->lfctl.offsets_recv = qcc->lfctl.offsets_consume = 0; rparams = &conn->handle.qc->tx.params; - qcc->rfctl.md = rparams->initial_max_data; + qfctl_init(&qcc->tx.fc, rparams->initial_max_data); qcc->rfctl.msd_bidi_l = rparams->initial_max_stream_data_bidi_local; qcc->rfctl.msd_bidi_r = rparams->initial_max_stream_data_bidi_remote; qcc->rfctl.msd_uni_l = rparams->initial_max_stream_data_uni; @@ -2580,10 +2679,12 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->wait_event.tasklet = tasklet_new(); if (!qcc->wait_event.tasklet) { TRACE_ERROR("taslket alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_tasklet; + goto err; } LIST_INIT(&qcc->send_list); + LIST_INIT(&qcc->fctl_list); + LIST_INIT(&qcc->buf_wait_list); qcc->wait_event.tasklet->process = qcc_io_cb; qcc->wait_event.tasklet->context = qcc; @@ -2591,7 +2692,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->proxy = prx; /* haproxy timeouts */ - if (conn_is_back(qcc->conn)) { + if (conn_is_back(conn)) { qcc->timeout = prx->timeout.server; qcc->shut_timeout = tick_isset(prx->timeout.serverfin) ? prx->timeout.serverfin : prx->timeout.server; @@ -2608,7 +2709,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->task = task_new_here(); if (!qcc->task) { TRACE_ERROR("timeout task alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_timeout_task; + goto err; } qcc->task->process = qcc_timeout_task; qcc->task->context = qcc; @@ -2619,11 +2720,12 @@ static int qmux_init(struct connection *conn, struct proxy *prx, HA_ATOMIC_STORE(&conn->handle.qc->qcc, qcc); + /* Register conn as app_ops may use it. */ + qcc->conn = conn; + if (qcc_install_app_ops(qcc, conn->handle.qc->app_ops)) { - TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW|QMUX_EV_QCC_ERR, qcc->conn); - /* prepare a CONNECTION_CLOSE frame */ - quic_set_connection_close(conn->handle.qc, quic_err_transport(QC_ERR_APPLICATION_ERROR)); - goto fail_install_app_ops; + TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW|QMUX_EV_QCC_ERR, conn); + goto err; } if (qcc->app_ops == &h3_ops) @@ -2636,19 +2738,24 @@ static int qmux_init(struct connection *conn, struct proxy *prx, /* init read cycle */ tasklet_wakeup(qcc->wait_event.tasklet); - TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn); + TRACE_LEAVE(QMUX_EV_QCC_NEW, conn); return 0; - fail_install_app_ops: - if (qcc->app_ops && qcc->app_ops->release) - qcc->app_ops->release(qcc->ctx); - task_destroy(qcc->task); - fail_no_timeout_task: - tasklet_free(qcc->wait_event.tasklet); - fail_no_tasklet: - pool_free(pool_head_qcc, qcc); - fail_no_qcc: - TRACE_LEAVE(QMUX_EV_QCC_NEW); + err: + /* Prepare CONNECTION_CLOSE, using INTERNAL_ERROR as fallback code if unset. */ + if (!(conn->handle.qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)) { + struct quic_err err = qcc && qcc->err.code ? + qcc->err : quic_err_transport(QC_ERR_INTERNAL_ERROR); + quic_set_connection_close(conn->handle.qc, err); + } + + if (qcc) { + /* In case of MUX init failure, session will ensure connection is freed. */ + qcc->conn = NULL; + qcc_release(qcc); + } + + TRACE_DEVEL("leaving on error", QMUX_EV_QCC_NEW, conn); return -1; } @@ -2704,6 +2811,7 @@ static void qmux_strm_detach(struct sedesc *sd) return; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_STRM_END); return; @@ -2786,11 +2894,18 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags) { struct qcs *qcs = __sc_mux_strm(sc); + const size_t old_data = qcs_prep_bytes(qcs); size_t ret = 0; char fin; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + /* Stream must not be woken up if already waiting for conn buffer. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + /* Sending forbidden if QCS is locally closed (FIN or RESET_STREAM sent). */ + BUG_ON(qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)); + /* stream layer has been detached so no transfer must occur after. */ BUG_ON_HOT(qcs->flags & QC_SF_DETACH); @@ -2801,8 +2916,20 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, goto end; } - if (qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)) { - ret = qcs_http_reset_buf(qcs, buf, count); + if (qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_DEVEL("leaving on connection flow control", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + if (!LIST_INLIST(&qcs->el_fctl)) { + TRACE_DEVEL("append to fctl-list", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + LIST_APPEND(&qcs->qcc->fctl_list, &qcs->el_fctl); + } + goto end; + } + + if (qfctl_sblocked(&qcs->tx.fc)) { + TRACE_DEVEL("leaving on flow-control reached", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); goto end; } @@ -2813,7 +2940,9 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, } if (ret || fin) { - qcc_send_stream(qcs, 0); + const size_t data = qcs_prep_bytes(qcs) - old_data; + if (data || fin) + qcc_send_stream(qcs, 0, data); if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND)) tasklet_wakeup(qcs->qcc->wait_event.tasklet); } @@ -2825,18 +2954,25 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, } -static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t qmux_strm_nego_ff(struct stconn *sc, struct buffer *input, + size_t count, unsigned int flags) { struct qcs *qcs = __sc_mux_strm(sc); size_t ret = 0; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + /* Stream must not be woken up if already waiting for conn buffer. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + /* Sending forbidden if QCS is locally closed (FIN or RESET_STREAM sent). */ + BUG_ON(qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)); + /* stream layer has been detached so no transfer must occur after. */ BUG_ON_HOT(qcs->flags & QC_SF_DETACH); if (!qcs->qcc->app_ops->nego_ff || !qcs->qcc->app_ops->done_ff) { - /* Fast forwading is not supported by the QUIC application layer */ + /* Fast forwarding is not supported by the QUIC application layer */ qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; goto end; } @@ -2850,6 +2986,22 @@ static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count goto end; } + if (qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_DEVEL("leaving on connection flow control", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + if (!LIST_INLIST(&qcs->el_fctl)) { + TRACE_DEVEL("append to fctl-list", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + LIST_APPEND(&qcs->qcc->fctl_list, &qcs->el_fctl); + } + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + if (qfctl_sblocked(&qcs->tx.fc)) { + TRACE_DEVEL("leaving on flow-control reached", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + /* Alawys disable splicing */ qcs->sd->iobuf.flags |= IOBUF_FL_NO_SPLICING; @@ -2880,36 +3032,37 @@ static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count return ret; } -static size_t qmux_done_ff(struct stconn *sc) +static size_t qmux_strm_done_ff(struct stconn *sc) { struct qcs *qcs = __sc_mux_strm(sc); struct qcc *qcc = qcs->qcc; struct sedesc *sd = qcs->sd; - size_t total = 0; + size_t total = 0, data = sd->iobuf.data; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - if (sd->iobuf.flags & IOBUF_FL_EOI) + if (sd->iobuf.flags & IOBUF_FL_EOI) { + TRACE_STATE("reached stream fin", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); qcs->flags |= QC_SF_FIN_STREAM; + } if (!(qcs->flags & QC_SF_FIN_STREAM) && !sd->iobuf.data) goto end; + data += sd->iobuf.offset; total = qcs->qcc->app_ops->done_ff(qcs); - qcc_send_stream(qcs, 0); + if (data || qcs->flags & QC_SF_FIN_STREAM) + qcc_send_stream(qcs, 0, data); if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND)) tasklet_wakeup(qcc->wait_event.tasklet); end: - if (!b_data(&qcs->tx.buf)) - b_free(&qcs->tx.buf); - TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); return total; } -static int qmux_resume_ff(struct stconn *sc, unsigned int flags) +static int qmux_strm_resume_ff(struct stconn *sc, unsigned int flags) { return 0; } @@ -2962,16 +3115,20 @@ static int qmux_wake(struct connection *conn) return 0; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_QCC_WAKE); return 1; } -static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) +static void qmux_strm_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct qcs *qcs = __sc_mux_strm(sc); struct qcc *qcc = qcs->qcc; + if (!(mode & (SE_SHW_SILENT|SE_SHW_NORMAL))) + return; + TRACE_ENTER(QMUX_EV_STRM_SHUT, qcc->conn, qcs); /* Early closure reported if QC_SF_FIN_STREAM not yet set. */ @@ -2984,7 +3141,7 @@ static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) TRACE_STATE("set FIN STREAM", QMUX_EV_STRM_SHUT, qcc->conn, qcs); qcs->flags |= QC_SF_FIN_STREAM; - qcc_send_stream(qcs, 0); + qcc_send_stream(qcs, 0, 0); } } else { @@ -2999,6 +3156,34 @@ static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) TRACE_LEAVE(QMUX_EV_STRM_SHUT, qcc->conn, qcs); } +static int qmux_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) +{ + struct qcc *qcc = conn->ctx; + + switch (mux_ctl) { + case MUX_CTL_EXIT_STATUS: + return MUX_ES_UNKNOWN; + + case MUX_CTL_GET_GLITCHES: + return qcc->glitches; + + case MUX_CTL_GET_NBSTRM: { + struct qcs *qcs; + unsigned int nb_strm = qcc->nb_sc; + + list_for_each_entry(qcs, &qcc->opening_list, el_opening) + nb_strm++; + return nb_strm; + } + + case MUX_CTL_GET_MAXSTRM: + return qcc->lfctl.ms_bidi_init; + + default: + return -1; + } +} + static int qmux_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output) { int ret = 0; @@ -3048,19 +3233,41 @@ static const struct mux_ops qmux_ops = { .detach = qmux_strm_detach, .rcv_buf = qmux_strm_rcv_buf, .snd_buf = qmux_strm_snd_buf, - .nego_fastfwd = qmux_nego_ff, - .done_fastfwd = qmux_done_ff, - .resume_fastfwd = qmux_resume_ff, + .nego_fastfwd = qmux_strm_nego_ff, + .done_fastfwd = qmux_strm_done_ff, + .resume_fastfwd = qmux_strm_resume_ff, .subscribe = qmux_strm_subscribe, .unsubscribe = qmux_strm_unsubscribe, .wake = qmux_wake, - .shutw = qmux_strm_shutw, + .shut = qmux_strm_shut, + .ctl = qmux_ctl, .sctl = qmux_sctl, .show_sd = qmux_strm_show_sd, .flags = MX_FL_HTX|MX_FL_NO_UPG|MX_FL_FRAMED, .name = "QUIC", }; +void qcc_show_quic(struct qcc *qcc) +{ + struct eb64_node *node; + chunk_appendf(&trash, " qcc=0x%p flags=0x%x sc=%llu hreq=%llu\n", + qcc, qcc->flags, (ullong)qcc->nb_sc, (ullong)qcc->nb_hreq); + + node = eb64_first(&qcc->streams_by_id); + while (node) { + struct qcs *qcs = eb64_entry(node, struct qcs, by_id); + chunk_appendf(&trash, " qcs=0x%p id=%llu flags=0x%x st=%s", + qcs, (ullong)qcs->id, qcs->flags, + qcs_st_to_str(qcs->st)); + if (!quic_stream_is_uni(qcs->id) || !quic_stream_is_local(qcc, qcs->id)) + chunk_appendf(&trash, " rxoff=%llu", (ullong)qcs->rx.offset); + if (!quic_stream_is_uni(qcs->id) || !quic_stream_is_remote(qcc, qcs->id)) + chunk_appendf(&trash, " txoff=%llu", (ullong)qcs->tx.fc.off_real); + chunk_appendf(&trash, "\n"); + node = eb64_next(node); + } +} + static struct mux_proto_list mux_proto_quic = { .token = IST("quic"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_FE, .mux = &qmux_ops }; diff --git a/src/mworker.c b/src/mworker.c index c71446a..c4461cc 100644 --- a/src/mworker.c +++ b/src/mworker.c @@ -20,10 +20,6 @@ #include <sys/wait.h> #include <unistd.h> -#if defined(USE_SYSTEMD) -#include <systemd/sd-daemon.h> -#endif - #include <haproxy/api.h> #include <haproxy/cfgparse.h> #include <haproxy/cli.h> @@ -45,6 +41,9 @@ #include <haproxy/tools.h> #include <haproxy/version.h> +#if defined(USE_SYSTEMD) +#include <haproxy/systemd.h> +#endif static int exitcode = -1; static int max_reloads = -1; /* number max of reloads a worker can have until they are killed */ @@ -565,17 +564,12 @@ void mworker_cleanup_proc() /* Displays workers and processes */ static int cli_io_handler_show_proc(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct mworker_proc *child; int old = 0; int up = date.tv_sec - proc_self->timestamp; char *uptime = NULL; char *reloadtxt = NULL; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - if (up < 0) /* must never be negative because of clock drift */ up = 0; @@ -719,15 +713,10 @@ static int cli_parse_reload(char **args, char *payload, struct appctx *appctx, v static int cli_io_handler_show_loadstatus(struct appctx *appctx) { char *env; - struct stconn *sc = appctx_sc(appctx); if (!cli_has_level(appctx, ACCESS_LVL_OPER)) return 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - env = getenv("HAPROXY_LOAD_SUCCESS"); if (!env) return 1; @@ -738,7 +727,7 @@ static int cli_io_handler_show_loadstatus(struct appctx *appctx) chunk_printf(&trash, "Success=1\n"); } #ifdef USE_SHM_OPEN - if (startup_logs && b_data(&startup_logs->buf) > 1) + if (startup_logs && ring_data(startup_logs) > 1) chunk_appendf(&trash, "--\n"); if (applet_putchk(appctx, &trash) == -1) diff --git a/src/pattern.c b/src/pattern.c index 52dda5e..f07223f 100644 --- a/src/pattern.c +++ b/src/pattern.c @@ -1547,6 +1547,10 @@ struct pat_ref *pat_ref_lookup(const char *reference) { struct pat_ref *ref; + /* Skip file@ prefix, it is the default case. Can be mixed with ref omitting the prefix */ + if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0) + reference += 5; + list_for_each_entry(ref, &pattern_reference, list) if (ref->reference && strcmp(reference, ref->reference) == 0) return ref; @@ -1834,6 +1838,22 @@ struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned } } + + if (strlen(reference) > 5 && strncmp(reference, "virt@", 5) == 0) + flags |= PAT_REF_ID; + else if (strlen(reference) > 4 && strncmp(reference, "opt@", 4) == 0) { + flags |= (PAT_REF_ID|PAT_REF_FILE); // Will be decided later + reference += 4; + } + else { + /* A file by default */ + flags |= PAT_REF_FILE; + /* Skip file@ prefix to be mixed with ref omitting the prefix */ + if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0) + reference += 5; + } + + ref->reference = strdup(reference); if (!ref->reference) { free(ref->display); @@ -2239,7 +2259,7 @@ struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref * * Return non-zero in case of success, otherwise 0. */ -int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char **err) +int pat_ref_read_from_file_smp(struct pat_ref *ref, char **err) { FILE *file; char *c; @@ -2250,11 +2270,17 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * char *value_beg; char *value_end; - file = fopen(filename, "r"); + file = fopen(ref->reference, "r"); if (!file) { - memprintf(err, "failed to open pattern file <%s>", filename); + if (ref->flags & PAT_REF_ID) { + /* file not found for an optional file, switch it to a virtual list of patterns */ + ref->flags &= ~PAT_REF_FILE; + return 1; + } + memprintf(err, "failed to open pattern file <%s>", ref->reference); return 0; } + ref->flags |= PAT_REF_FILE; /* now parse all patterns. The file may contain only one pattern * followed by one value per line. The start spaces, separator spaces @@ -2310,7 +2336,7 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * if (ferror(file)) { memprintf(err, "error encountered while reading <%s> : %s", - filename, strerror(errno)); + ref->reference, strerror(errno)); goto out_close; } /* success */ @@ -2324,7 +2350,7 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * /* Reads patterns from a file. If <err_msg> is non-NULL, an error message will * be returned there on errors and the caller will have to free it. */ -int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err) +int pat_ref_read_from_file(struct pat_ref *ref, char **err) { FILE *file; char *c; @@ -2332,9 +2358,14 @@ int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err int ret = 0; int line = 0; - file = fopen(filename, "r"); + file = fopen(ref->reference, "r"); if (!file) { - memprintf(err, "failed to open pattern file <%s>", filename); + if (ref->flags & PAT_REF_ID) { + /* file not found for an optional file, switch it to a virtual list of patterns */ + ref->flags &= ~PAT_REF_FILE; + return 1; + } + memprintf(err, "failed to open pattern file <%s>", ref->reference); return 0; } @@ -2365,14 +2396,14 @@ int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err continue; if (!pat_ref_append(ref, arg, NULL, line)) { - memprintf(err, "out of memory when loading patterns from file <%s>", filename); + memprintf(err, "out of memory when loading patterns from file <%s>", ref->reference); goto out_close; } } if (ferror(file)) { memprintf(err, "error encountered while reading <%s> : %s", - filename, strerror(errno)); + ref->reference, strerror(errno)); goto out_close; } ret = 1; /* success */ @@ -2406,14 +2437,16 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, return 0; } - if (load_smp) { - ref->flags |= PAT_REF_SMP; - if (!pat_ref_read_from_file_smp(ref, filename, err)) - return 0; - } - else { - if (!pat_ref_read_from_file(ref, filename, err)) - return 0; + if (ref->flags & PAT_REF_FILE) { + if (load_smp) { + ref->flags |= PAT_REF_SMP; + if (!pat_ref_read_from_file_smp(ref, err)) + return 0; + } + else { + if (!pat_ref_read_from_file(ref, err)) + return 0; + } } } else { @@ -2479,7 +2512,10 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, /* Load reference content in the pattern expression. * We need to load elements in the same order they were seen in the - * file as list-based matching types may rely on it. + * file. Indeed, some list-based matching types may rely on it as the + * list is positional, and for tree-based matching, even if the tree is + * content-based in case of duplicated keys we only want the first key + * in the file to be considered. */ list_for_each_entry(elt, &ref->head, list) { if (!pat_ref_push(elt, expr, patflags, err)) { diff --git a/src/peers.c b/src/peers.c index 9ba3d9b..4ec981c 100644 --- a/src/peers.c +++ b/src/peers.c @@ -49,57 +49,12 @@ #include <haproxy/tools.h> #include <haproxy/trace.h> - -/*******************************/ -/* Current peer learning state */ -/*******************************/ - -/******************************/ -/* Current peers section resync state */ -/******************************/ -#define PEERS_F_RESYNC_LOCAL 0x00000001 /* Learn from local finished or no more needed */ -#define PEERS_F_RESYNC_REMOTE 0x00000002 /* Learn from remote finished or no more needed */ -#define PEERS_F_RESYNC_ASSIGN 0x00000004 /* A peer was assigned to learn our lesson */ -#define PEERS_F_RESYNC_PROCESS 0x00000008 /* The assigned peer was requested for resync */ -#define PEERS_F_RESYNC_LOCALTIMEOUT 0x00000010 /* Timeout waiting for a full resync from a local node */ -#define PEERS_F_RESYNC_REMOTETIMEOUT 0x00000020 /* Timeout waiting for a full resync from a remote node */ -#define PEERS_F_RESYNC_LOCALABORT 0x00000040 /* Session aborted learning from a local node */ -#define PEERS_F_RESYNC_REMOTEABORT 0x00000080 /* Session aborted learning from a remote node */ -#define PEERS_F_RESYNC_LOCALFINISHED 0x00000100 /* A local node teach us and was fully up to date */ -#define PEERS_F_RESYNC_REMOTEFINISHED 0x00000200 /* A remote node teach us and was fully up to date */ -#define PEERS_F_RESYNC_LOCALPARTIAL 0x00000400 /* A local node teach us but was partially up to date */ -#define PEERS_F_RESYNC_REMOTEPARTIAL 0x00000800 /* A remote node teach us but was partially up to date */ -#define PEERS_F_RESYNC_LOCALASSIGN 0x00001000 /* A local node was assigned for a full resync */ -#define PEERS_F_RESYNC_REMOTEASSIGN 0x00002000 /* A remote node was assigned for a full resync */ -#define PEERS_F_RESYNC_REQUESTED 0x00004000 /* A resync was explicitly requested */ -#define PEERS_F_DONOTSTOP 0x00010000 /* Main table sync task block process during soft stop - to push data to new process */ - -#define PEERS_RESYNC_STATEMASK (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE) -#define PEERS_RESYNC_FROMLOCAL 0x00000000 -#define PEERS_RESYNC_FROMREMOTE PEERS_F_RESYNC_LOCAL -#define PEERS_RESYNC_FINISHED (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE) - /***********************************/ /* Current shared table sync state */ /***********************************/ #define SHTABLE_F_TEACH_STAGE1 0x00000001 /* Teach state 1 complete */ #define SHTABLE_F_TEACH_STAGE2 0x00000002 /* Teach state 2 complete */ -/******************************/ -/* Remote peer teaching state */ -/******************************/ -#define PEER_F_TEACH_PROCESS 0x00000001 /* Teach a lesson to current peer */ -#define PEER_F_TEACH_FINISHED 0x00000008 /* Teach conclude, (wait for confirm) */ -#define PEER_F_TEACH_COMPLETE 0x00000010 /* All that we know already taught to current peer, used only for a local peer */ -#define PEER_F_LEARN_ASSIGN 0x00000100 /* Current peer was assigned for a lesson */ -#define PEER_F_LEARN_NOTUP2DATE 0x00000200 /* Learn from peer finished but peer is not up to date */ -#define PEER_F_ALIVE 0x20000000 /* Used to flag a peer a alive. */ -#define PEER_F_HEARTBEAT 0x40000000 /* Heartbeat message to send. */ -#define PEER_F_DWNGRD 0x80000000 /* When this flag is enabled, we must downgrade the supported version announced during peer sessions. */ - -#define PEER_TEACH_RESET ~(PEER_F_TEACH_PROCESS|PEER_F_TEACH_FINISHED) /* PEER_F_TEACH_COMPLETE should never be reset */ -#define PEER_LEARN_RESET ~(PEER_F_LEARN_ASSIGN|PEER_F_LEARN_NOTUP2DATE) #define PEER_RESYNC_TIMEOUT 5000 /* 5 seconds */ #define PEER_RECONNECT_TIMEOUT 5000 /* 5 seconds */ @@ -334,6 +289,7 @@ static const struct trace_event peers_trace_events[] = { { .mask = PEERS_EV_SESSREL, .name = "sessrl", .desc = "peer session releasing" }, #define PEERS_EV_PROTOERR (1 << 6) { .mask = PEERS_EV_PROTOERR, .name = "protoerr", .desc = "protocol error" }, + { } }; static const struct name_desc peers_trace_lockon_args[4] = { @@ -489,6 +445,38 @@ static const char *statuscode_str(int statuscode) } } +static const char *peer_app_state_str(enum peer_app_state appstate) +{ + switch (appstate) { + case PEER_APP_ST_STOPPED: + return "STOPPED"; + case PEER_APP_ST_STARTING: + return "STARTING"; + case PEER_APP_ST_RUNNING: + return "RUNNING"; + case PEER_APP_ST_STOPPING: + return "STOPPING"; + default: + return "UNKNOWN"; + } +} + +static const char *peer_learn_state_str(enum peer_learn_state learnstate) +{ + switch (learnstate) { + case PEER_LR_ST_NOTASSIGNED: + return "NOTASSIGNED"; + case PEER_LR_ST_ASSIGNED: + return "ASSIGNED"; + case PEER_LR_ST_PROCESSING: + return "PROCESSING"; + case PEER_LR_ST_FINISHED: + return "FINISHED"; + default: + return "UNKNOWN"; + } +} + /* This function encode an uint64 to 'dynamic' length format. The encoded value is written at address *str, and the caller must assure that size after *str is large enough. @@ -1059,21 +1047,14 @@ void __peer_session_deinit(struct peer *peer) /* Re-init current table pointers to force announcement on re-connect */ peer->remote_table = peer->last_local_table = peer->stop_local_table = NULL; peer->appctx = NULL; - if (peer->flags & PEER_F_LEARN_ASSIGN) { - /* unassign current peer for learning */ - peer->flags &= ~(PEER_F_LEARN_ASSIGN); - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALABORT; - else - peers->flags |= PEERS_F_RESYNC_REMOTEABORT; - /* reschedule a resync */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000)); - } - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; + /* reset teaching flags to 0 */ + peer->flags &= ~PEER_TEACH_FLAGS; + + /* Mark the peer as stopping and wait for the sync task */ + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + peer->appstate = PEER_APP_ST_STOPPING; + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } @@ -1083,8 +1064,9 @@ static int peer_session_init(struct appctx *appctx) struct stream *s; struct sockaddr_storage *addr = NULL; - if (!sockaddr_alloc(&addr, &peer->addr, sizeof(peer->addr))) + if (!sockaddr_alloc(&addr, &peer->srv->addr, sizeof(peer->srv->addr))) goto out_error; + set_host_port(addr, peer->srv->svc_port); if (appctx_finalize_startup(appctx, peer->peers->peers_fe, &BUF_NULL) == -1) goto out_free_addr; @@ -1393,7 +1375,7 @@ static inline int peer_send_resync_finishedmsg(struct appctx *appctx, .control.head = { PEER_MSG_CLASS_CONTROL, }, }; - p.control.head[1] = (peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ? + p.control.head[1] = (HA_ATOMIC_LOAD(&peers->flags) & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ? PEER_MSG_CTRL_RESYNCFINISHED : PEER_MSG_CTRL_RESYNCPARTIAL; TRACE_PROTO("send control message", PEERS_EV_CTRLMSG, @@ -1472,11 +1454,12 @@ static inline int peer_send_error_protomsg(struct appctx *appctx) /* * Function used to lookup for recent stick-table updates associated with - * <st> shared stick-table when a lesson must be taught a peer (PEER_F_LEARN_ASSIGN flag set). + * <st> shared stick-table when a lesson must be taught a peer (learn state is not PEER_LR_ST_NOTASSIGNED). */ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb) { @@ -1496,7 +1479,10 @@ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_ta return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1506,6 +1492,7 @@ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_ta static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb) { @@ -1516,7 +1503,10 @@ static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_tab return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1526,6 +1516,7 @@ static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_tab static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb || eb->key > st->teaching_origin) { @@ -1533,7 +1524,10 @@ static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_tab return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1621,10 +1615,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, updates_sent++; if (updates_sent >= peers_max_updates_at_once) { - /* pretend we're full so that we get back ASAP */ - struct stconn *sc = appctx_sc(appctx); - - sc_need_room(sc, 0); + applet_have_more_data(appctx); ret = -1; break; } @@ -1637,7 +1628,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, /* * Function to emit update messages for <st> stick-table when a lesson must - * be taught to the peer <p> (PEER_F_LEARN_ASSIGN flag set). + * be taught to the peer <p> (learn state is not PEER_LR_ST_NOTASSIGNED). * * Note that <st> shared stick-table is locked when calling this function, and * the lock is dropped then re-acquired. @@ -1650,13 +1641,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, static inline int peer_send_teach_process_msgs(struct appctx *appctx, struct peer *p, struct shared_table *st) { - int ret; - - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - ret = peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - - return ret; + return peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st); } /* @@ -2487,73 +2472,27 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee } /* reset teaching flags to 0 */ - peer->flags &= PEER_TEACH_RESET; + peer->flags &= ~PEER_TEACH_FLAGS; /* flag to start to teach lesson */ - peer->flags |= PEER_F_TEACH_PROCESS; - peers->flags |= PEERS_F_RESYNC_REQUESTED; + peer->flags |= (PEER_F_TEACH_PROCESS|PEER_F_DBG_RESYNC_REQUESTED); } else if (msg_head[1] == PEER_MSG_CTRL_RESYNCFINISHED) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, NULL, &msg_head[1], peers->local->id, peer->id); - if (peer->flags & PEER_F_LEARN_ASSIGN) { - int commit_a_finish = 1; - - peer->flags &= ~PEER_F_LEARN_ASSIGN; - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - if (peer->srv->shard) { - struct peer *ps; - - peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL; - peer->flags |= PEER_F_LEARN_NOTUP2DATE; - for (ps = peers->remote; ps; ps = ps->next) { - if (ps->srv->shard == peer->srv->shard) { - /* flag all peers from same shard - * notup2date to disable request - * of a resync frm them - */ - ps->flags |= PEER_F_LEARN_NOTUP2DATE; - } - else if (ps->srv->shard && !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) { - /* it remains some other shards not requested - * we don't commit a resync finish to request - * the other shards - */ - commit_a_finish = 0; - } - } - - if (!commit_a_finish) { - /* it remains some shard to request, we schedule a new request - */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); - task_wakeup(peers->sync_task, TASK_WOKEN_MSG); - } - } - - if (commit_a_finish) { - peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE); - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALFINISHED; - else - peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED; - } + if (peer->learnstate == PEER_LR_ST_PROCESSING) { + peer->learnstate = PEER_LR_ST_FINISHED; + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } peer->confirm++; } else if (msg_head[1] == PEER_MSG_CTRL_RESYNCPARTIAL) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, NULL, &msg_head[1], peers->local->id, peer->id); - if (peer->flags & PEER_F_LEARN_ASSIGN) { - peer->flags &= ~PEER_F_LEARN_ASSIGN; - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALPARTIAL; - else - peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL; - peer->flags |= PEER_F_LEARN_NOTUP2DATE; - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + if (peer->learnstate == PEER_LR_ST_PROCESSING) { + peer->learnstate = PEER_LR_ST_FINISHED; + peer->flags |= (PEER_F_LEARN_NOTUP2DATE|PEER_F_WAIT_SYNCTASK_ACK); task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } peer->confirm++; @@ -2566,7 +2505,7 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee /* If stopping state */ if (stopping) { /* Close session, push resync no more needed */ - peer->flags |= PEER_F_TEACH_COMPLETE; + peer->flags |= PEER_F_LOCAL_TEACH_COMPLETE; appctx->st0 = PEER_SESS_ST_END; return 0; } @@ -2576,7 +2515,7 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee } /* reset teaching flags to 0 */ - peer->flags &= PEER_TEACH_RESET; + peer->flags &= ~PEER_TEACH_FLAGS; } else if (msg_head[1] == PEER_MSG_CTRL_HEARTBEAT) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, @@ -2650,16 +2589,13 @@ static inline int peer_send_msgs(struct appctx *appctx, { int repl; - /* Need to request a resync */ - if ((peer->flags & PEER_F_LEARN_ASSIGN) && - (peers->flags & PEERS_F_RESYNC_ASSIGN) && - !(peers->flags & PEERS_F_RESYNC_PROCESS)) { - + /* Need to request a resync (only possible for a remote peer at this stage) */ + if (peer->learnstate == PEER_LR_ST_ASSIGNED) { + BUG_ON(peer->local); repl = peer_send_resync_reqmsg(appctx, peer, peers); if (repl <= 0) return repl; - - peers->flags |= PEERS_F_RESYNC_PROCESS; + peer->learnstate = PEER_LR_ST_PROCESSING; } /* Nothing to read, now we start to write */ @@ -2688,18 +2624,19 @@ static inline int peer_send_msgs(struct appctx *appctx, } if (!(peer->flags & PEER_F_TEACH_PROCESS)) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - if (!(peer->flags & PEER_F_LEARN_ASSIGN) && - (st->last_pushed != st->table->localupdate)) { + int must_send; + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &st->table->updt_lock); + must_send = (peer->learnstate == PEER_LR_ST_NOTASSIGNED) && (st->last_pushed != st->table->localupdate); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); + + if (must_send) { repl = peer_send_teach_process_msgs(appctx, peer, st); if (repl <= 0) { - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); peer->stop_local_table = peer->last_local_table; return repl; } } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); } else if (!(peer->flags & PEER_F_TEACH_FINISHED)) { if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) { @@ -2733,10 +2670,7 @@ static inline int peer_send_msgs(struct appctx *appctx, updates++; if (updates >= peers_max_updates_at_once) { - /* pretend we're full so that we get back ASAP */ - struct stconn *sc = appctx_sc(appctx); - - sc_need_room(sc, 0); + applet_have_more_data(appctx); return -1; } @@ -2872,88 +2806,16 @@ static inline int peer_getline_last(struct appctx *appctx, struct peer **curpeer } /* - * Init <peer> peer after having accepted it at peer protocol level. - */ -static inline void init_accepted_peer(struct peer *peer, struct peers *peers) -{ - struct shared_table *st; - - peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - /* Register status code */ - peer->statuscode = PEER_SESS_SC_SUCCESSCODE; - peer->last_hdshk = now_ms; - - /* Awake main task */ - task_wakeup(peers->sync_task, TASK_WOKEN_MSG); - - /* Init confirm counter */ - peer->confirm = 0; - - /* Init cursors */ - for (st = peer->tables; st ; st = st->next) { - uint commitid, updateid; - - st->last_get = st->last_acked = 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - /* if st->update appears to be in future it means - * that the last acked value is very old and we - * remain unconnected a too long time to use this - * acknowledgement as a reset. - * We should update the protocol to be able to - * signal the remote peer that it needs a full resync. - * Here a partial fix consist to set st->update at - * the max past value - */ - if ((int)(st->table->localupdate - st->update) < 0) - st->update = st->table->localupdate + (2147483648U); - st->teaching_origin = st->last_pushed = st->update; - st->flags = 0; - - updateid = st->last_pushed; - commitid = _HA_ATOMIC_LOAD(&st->table->commitupdate); - - while ((int)(updateid - commitid) > 0) { - if (_HA_ATOMIC_CAS(&st->table->commitupdate, &commitid, updateid)) - break; - __ha_cpu_relax(); - } - - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - } - - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; - - /* if current peer is local */ - if (peer->local) { - /* if current host need resyncfrom local and no process assigned */ - if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* assign local peer for a lesson, consider lesson already requested */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= (PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - peers->flags |= PEERS_F_RESYNC_LOCALASSIGN; - } - - } - else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* assign peer for a lesson */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; - } -} - -/* - * Init <peer> peer after having connected it at peer protocol level. + * Init <peer> peer after validating a connection at peer protocol level. It may + * a incoming or outgoing connection. The peer init must be acknowledge by the + * sync task. Message processing is blocked in the meanwhile. */ static inline void init_connected_peer(struct peer *peer, struct peers *peers) { struct shared_table *st; peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + /* Init cursors */ for (st = peer->tables; st ; st = st->next) { uint updateid, commitid; @@ -2986,28 +2848,25 @@ static inline void init_connected_peer(struct peer *peer, struct peers *peers) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); } + /* Awake main task to ack the new peer state */ + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); + /* Init confirm counter */ peer->confirm = 0; - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; + /* reset teaching flags to 0 */ + peer->flags &= ~PEER_TEACH_FLAGS; - /* If current peer is local */ - if (peer->local) { - /* flag to start to teach lesson */ - peer->flags |= PEER_F_TEACH_PROCESS; - } - else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* If peer is remote and resync from remote is needed, - and no peer currently assigned */ - - /* assign peer for a lesson */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; + if (peer->local && !(appctx_is_back(peer->appctx))) { + /* If the local peer has established the connection (appctx is + * on the frontend side), flag it to start to teach lesson. + */ + peer->flags |= PEER_F_TEACH_PROCESS; } + + /* Mark the peer as starting and wait the sync task */ + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + peer->appstate = PEER_APP_ST_STARTING; } /* @@ -3024,7 +2883,7 @@ static void peer_io_handler(struct appctx *appctx) unsigned int maj_ver, min_ver; int prev_state; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -3091,6 +2950,7 @@ switchstate: */ curpeer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); peer_session_forceshutdown(curpeer); + curpeer->heartbeat = TICK_ETERNITY; curpeer->coll++; } @@ -3127,7 +2987,11 @@ switchstate: goto switchstate; } - init_accepted_peer(curpeer, curpeers); + /* Register status code */ + curpeer->statuscode = PEER_SESS_SC_SUCCESSCODE; + curpeer->last_hdshk = now_ms; + + init_connected_peer(curpeer, curpeers); /* switch to waiting message state */ _HA_ATOMIC_INC(&connected_peers); @@ -3216,6 +3080,13 @@ switchstate: } } + if (curpeer->flags & PEER_F_WAIT_SYNCTASK_ACK) + goto out; + + /* local peer is assigned of a lesson, start it */ + if (curpeer->learnstate == PEER_LR_ST_ASSIGNED && curpeer->local) + curpeer->learnstate = PEER_LR_ST_PROCESSING; + reql = peer_recv_msg(appctx, (char *)msg_head, sizeof msg_head, &msg_len, &totl); if (reql <= 0) { if (reql == -1) @@ -3348,7 +3219,7 @@ static void peer_session_forceshutdown(struct peer *peer) /* Pre-configures a peers frontend to accept incoming connections */ void peers_setup_frontend(struct proxy *fe) { - fe->last_change = ns_to_sec(now_ns); + fe->fe_counters.last_change = ns_to_sec(now_ns); fe->cap = PR_CAP_FE | PR_CAP_BE; fe->mode = PR_MODE_PEERS; fe->maxconn = 0; @@ -3394,274 +3265,432 @@ static struct appctx *peer_session_create(struct peers *peers, struct peer *peer return NULL; } -/* - * Task processing function to manage re-connect, peer session - * tasks wakeup on local update and heartbeat. Let's keep it exported so that it - * resolves in stack traces and "show tasks". +/* Clear LEARN flags to a given peer, dealing with aborts if it was assigned for + * learning. In this case, the resync timeout is re-armed. */ -struct task *process_peer_sync(struct task * task, void *context, unsigned int state) +static void clear_peer_learning_status(struct peer *peer) { - struct peers *peers = context; - struct peer *ps; - struct shared_table *st; + if (peer->learnstate != PEER_LR_ST_NOTASSIGNED) { + struct peers *peers = peer->peers; - task->expire = TICK_ETERNITY; + /* unassign current peer for learning */ + HA_ATOMIC_AND(&peers->flags, ~PEERS_F_RESYNC_ASSIGN); + HA_ATOMIC_OR(&peers->flags, (peer->local ? PEERS_F_DBG_RESYNC_LOCALABORT : PEERS_F_DBG_RESYNC_REMOTEABORT)); - /* Acquire lock for all peers of the section */ - for (ps = peers->remote; ps; ps = ps->next) - HA_SPIN_LOCK(PEER_LOCK, &ps->lock); + /* reschedule a resync */ + peer->peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000)); + peer->learnstate = PEER_LR_ST_NOTASSIGNED; + } + peer->flags &= ~PEER_F_LEARN_NOTUP2DATE; +} - if (!stopping) { - /* Normal case (not soft stop)*/ +static void sync_peer_learn_state(struct peers *peers, struct peer *peer) +{ + unsigned int flags = 0; - /* resync timeout set to TICK_ETERNITY means we just start - * a new process and timer was not initialized. - * We must arm this timer to switch to a request to a remote - * node if incoming connection from old local process never - * comes. - */ - if (peers->resync_timeout == TICK_ETERNITY) - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + if (peer->learnstate != PEER_LR_ST_FINISHED) + return; + + /* The learning process is now finished */ + if (peer->flags & PEER_F_LEARN_NOTUP2DATE) { + /* Partial resync */ + flags |= (peer->local ? PEERS_F_DBG_RESYNC_LOCALPARTIAL : PEERS_F_DBG_RESYNC_REMOTEPARTIAL); + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + else { + /* Full resync */ + struct peer *rem_peer; + int commit_a_finish = 1; + + if (peer->srv->shard) { + flags |= PEERS_F_DBG_RESYNC_REMOTEPARTIAL; + peer->flags |= PEER_F_LEARN_NOTUP2DATE; + for (rem_peer = peers->remote; rem_peer; rem_peer = rem_peer->next) { + if (rem_peer->srv->shard && rem_peer != peer) { + HA_SPIN_LOCK(PEER_LOCK, &rem_peer->lock); + if (rem_peer->srv->shard == peer->srv->shard) { + /* flag all peers from same shard + * notup2date to disable request + * of a resync frm them + */ + rem_peer->flags |= PEER_F_LEARN_NOTUP2DATE; + } + else if (!(rem_peer->flags & PEER_F_LEARN_NOTUP2DATE)) { + /* it remains some other shards not requested + * we don't commit a resync finish to request + * the other shards + */ + commit_a_finish = 0; + } + HA_SPIN_UNLOCK(PEER_LOCK, &rem_peer->lock); + } + } - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) && - (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* Resync from local peer needed - no peer was assigned for the lesson - and no old local peer found - or resync timeout expire */ + if (!commit_a_finish) { + /* it remains some shard to request, we schedule a new request */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + } - /* flag no more resync from local, to try resync from remotes */ - peers->flags |= PEERS_F_RESYNC_LOCAL; - peers->flags |= PEERS_F_RESYNC_LOCALTIMEOUT; + if (commit_a_finish) { + flags |= (PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_RESYNC_REMOTE_FINISHED); + flags |= (peer->local ? PEERS_F_DBG_RESYNC_LOCALFINISHED : PEERS_F_DBG_RESYNC_REMOTEFINISHED); + } + } + peer->learnstate = PEER_LR_ST_NOTASSIGNED; + HA_ATOMIC_AND(&peers->flags, ~PEERS_F_RESYNC_ASSIGN); + HA_ATOMIC_OR(&peers->flags, flags); - /* reschedule a resync */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + appctx_wakeup(peer->appctx); +} + +/* Synchronise the peer applet state with its associated peers section. This + * function handles STARTING->RUNNING and STOPPING->STOPPED transitions. + */ +static void sync_peer_app_state(struct peers *peers, struct peer *peer) +{ + if (peer->appstate == PEER_APP_ST_STOPPING) { + clear_peer_learning_status(peer); + peer->appstate = PEER_APP_ST_STOPPED; + } + else if (peer->appstate == PEER_APP_ST_STARTING) { + clear_peer_learning_status(peer); + if (peer->local & appctx_is_back(peer->appctx)) { + /* if local peer has accepted the connection (appctx is + * on the backend side), flag it to learn a lesson and + * be sure it will start immediately. This only happens + * if no resync is in progress and if the lacal resync + * was not already performed. + */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* assign local peer for a lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_LOCALASSIGN); + } + } + else if (!peer->local) { + /* If a connection was validated for a remote peer, flag + * it to learn a lesson but don't start it yet. The peer + * must request it explicitly. This only happens if no + * resync is in progress and if the remote resync was + * not already performed. + */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* assign remote peer for a lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_REMOTEASSIGN); + } } + peer->appstate = PEER_APP_ST_RUNNING; + appctx_wakeup(peer->appctx); + } +} - /* For each session */ - for (ps = peers->remote; ps; ps = ps->next) { - /* For each remote peers */ - if (!ps->local) { - if (!ps->appctx) { - /* no active peer connection */ - if (ps->statuscode == 0 || - ((ps->statuscode == PEER_SESS_SC_CONNECTCODE || - ps->statuscode == PEER_SESS_SC_SUCCESSCODE || - ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) && - tick_is_expired(ps->reconnect, now_ms))) { - /* connection never tried - * or previous peer connection established with success - * or previous peer connection failed while connecting - * and reconnection timer is expired */ - - /* retry a connect */ - ps->appctx = peer_session_create(peers, ps); - } - else if (!tick_is_expired(ps->reconnect, now_ms)) { - /* If previous session failed during connection - * but reconnection timer is not expired */ +/* Process the sync task for a running process. It is called from process_peer_sync() only */ +static void __process_running_peer_sync(struct task *task, struct peers *peers, unsigned int state) +{ + struct peer *peer; + struct shared_table *st; - /* reschedule task for reconnect */ - task->expire = tick_first(task->expire, ps->reconnect); - } - /* else do nothing */ - } /* !ps->appctx */ - else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE) { - /* current peer connection is active and established */ - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN) && - !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) { - /* Resync from a remote is needed - * and no peer was assigned for lesson - * and current peer may be up2date */ - - /* assign peer for the lesson */ - ps->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; - - /* wake up peer handler to handle a request of resync */ - appctx_wakeup(ps->appctx); + /* resync timeout set to TICK_ETERNITY means we just start + * a new process and timer was not initialized. + * We must arm this timer to switch to a request to a remote + * node if incoming connection from old local process never + * comes. + */ + if (peers->resync_timeout == TICK_ETERNITY) + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) && + (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* Resync from local peer needed + no peer was assigned for the lesson + and no old local peer found + or resync timeout expire */ + + /* flag no more resync from local, to try resync from remotes */ + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_DBG_RESYNC_LOCALTIMEOUT); + + /* reschedule a resync */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + + /* For each session */ + for (peer = peers->remote; peer; peer = peer->next) { + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + + sync_peer_learn_state(peers, peer); + sync_peer_app_state(peers, peer); + + /* Peer changes, if any, were now ack by the sync task. Unblock + * the peer (any wakeup should already be performed, no need to + * do it here) + */ + peer->flags &= ~PEER_F_WAIT_SYNCTASK_ACK; + + /* For each remote peers */ + if (!peer->local) { + if (!peer->appctx) { + /* no active peer connection */ + if (peer->statuscode == 0 || + ((peer->statuscode == PEER_SESS_SC_CONNECTCODE || + peer->statuscode == PEER_SESS_SC_SUCCESSCODE || + peer->statuscode == PEER_SESS_SC_CONNECTEDCODE) && + tick_is_expired(peer->reconnect, now_ms))) { + /* connection never tried + * or previous peer connection established with success + * or previous peer connection failed while connecting + * and reconnection timer is expired */ + + /* retry a connect */ + peer->appctx = peer_session_create(peers, peer); + } + else if (!tick_is_expired(peer->reconnect, now_ms)) { + /* If previous session failed during connection + * but reconnection timer is not expired */ + + /* reschedule task for reconnect */ + task->expire = tick_first(task->expire, peer->reconnect); + } + /* else do nothing */ + } /* !peer->appctx */ + else if (peer->statuscode == PEER_SESS_SC_SUCCESSCODE) { + /* current peer connection is active and established */ + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN) && + !(peer->flags & PEER_F_LEARN_NOTUP2DATE)) { + /* Resync from a remote is needed + * and no peer was assigned for lesson + * and current peer may be up2date */ + + /* assign peer for the lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_REMOTEASSIGN); + + /* wake up peer handler to handle a request of resync */ + appctx_wakeup(peer->appctx); + } + else { + int update_to_push = 0; + + /* Awake session if there is data to push */ + for (st = peer->tables; st ; st = st->next) { + if (st->last_pushed != st->table->localupdate) { + /* wake up the peer handler to push local updates */ + update_to_push = 1; + /* There is no need to send a heartbeat message + * when some updates must be pushed. The remote + * peer will consider <peer> peer as alive when it will + * receive these updates. + */ + peer->flags &= ~PEER_F_HEARTBEAT; + /* Re-schedule another one later. */ + peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + /* Refresh reconnect if necessary */ + if (tick_is_expired(peer->reconnect, now_ms)) + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); + /* We are going to send updates, let's ensure we will + * come back to send heartbeat messages or to reconnect. + */ + task->expire = tick_first(peer->reconnect, peer->heartbeat); + appctx_wakeup(peer->appctx); + break; + } } - else { - int update_to_push = 0; - - /* Awake session if there is data to push */ - for (st = ps->tables; st ; st = st->next) { - if (st->last_pushed != st->table->localupdate) { - /* wake up the peer handler to push local updates */ - update_to_push = 1; - /* There is no need to send a heartbeat message - * when some updates must be pushed. The remote - * peer will consider <ps> peer as alive when it will - * receive these updates. - */ - ps->flags &= ~PEER_F_HEARTBEAT; - /* Re-schedule another one later. */ - ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - /* Refresh reconnect if necessary */ - if (tick_is_expired(ps->reconnect, now_ms)) - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); - /* We are going to send updates, let's ensure we will - * come back to send heartbeat messages or to reconnect. + /* When there are updates to send we do not reconnect + * and do not send heartbeat message either. + */ + if (!update_to_push) { + if (tick_is_expired(peer->reconnect, now_ms)) { + if (peer->flags & PEER_F_ALIVE) { + /* This peer was alive during a 'reconnect' period. + * Flag it as not alive again for the next period. */ - task->expire = tick_first(ps->reconnect, ps->heartbeat); - appctx_wakeup(ps->appctx); - break; - } - } - /* When there are updates to send we do not reconnect - * and do not send heartbeat message either. - */ - if (!update_to_push) { - if (tick_is_expired(ps->reconnect, now_ms)) { - if (ps->flags & PEER_F_ALIVE) { - /* This peer was alive during a 'reconnect' period. - * Flag it as not alive again for the next period. - */ - ps->flags &= ~PEER_F_ALIVE; - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); - } - else { - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); - ps->heartbeat = TICK_ETERNITY; - peer_session_forceshutdown(ps); - ps->no_hbt++; - } + peer->flags &= ~PEER_F_ALIVE; + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); } - else if (tick_is_expired(ps->heartbeat, now_ms)) { - ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - ps->flags |= PEER_F_HEARTBEAT; - appctx_wakeup(ps->appctx); + else { + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); + peer->heartbeat = TICK_ETERNITY; + peer_session_forceshutdown(peer); + sync_peer_app_state(peers, peer); + peer->no_hbt++; } - task->expire = tick_first(ps->reconnect, ps->heartbeat); } - } - /* else do nothing */ - } /* SUCCESSCODE */ - } /* !ps->peer->local */ - } /* for */ - - /* Resync from remotes expired: consider resync is finished */ - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN) && - tick_is_expired(peers->resync_timeout, now_ms)) { - /* Resync from remote peer needed - * no peer was assigned for the lesson - * and resync timeout expire */ - - /* flag no more resync from remote, consider resync is finished */ - peers->flags |= PEERS_F_RESYNC_REMOTE; - peers->flags |= PEERS_F_RESYNC_REMOTETIMEOUT; - } - - if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) { - /* Resync not finished*/ - /* reschedule task to resync timeout if not expired, to ended resync if needed */ - if (!tick_is_expired(peers->resync_timeout, now_ms)) - task->expire = tick_first(task->expire, peers->resync_timeout); - } - } /* !stopping */ - else { - /* soft stop case */ - if (state & TASK_WOKEN_SIGNAL) { - /* We've just received the signal */ - if (!(peers->flags & PEERS_F_DONOTSTOP)) { - /* add DO NOT STOP flag if not present */ - _HA_ATOMIC_INC(&jobs); - peers->flags |= PEERS_F_DONOTSTOP; - - /* disconnect all connected peers to process a local sync - * this must be done only the first time we are switching - * in stopping state - */ - for (ps = peers->remote; ps; ps = ps->next) { - /* we're killing a connection, we must apply a random delay before - * retrying otherwise the other end will do the same and we can loop - * for a while. - */ - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); - if (ps->appctx) { - peer_session_forceshutdown(ps); + else if (tick_is_expired(peer->heartbeat, now_ms)) { + peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + peer->flags |= PEER_F_HEARTBEAT; + appctx_wakeup(peer->appctx); + } + task->expire = tick_first(peer->reconnect, peer->heartbeat); } } + /* else do nothing */ + } /* SUCCESSCODE */ + } /* !peer->peer->local */ - /* Set resync timeout for the local peer and request a immediate reconnect */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); - peers->local->reconnect = now_ms; + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); + } /* for */ + + /* Resync from remotes expired or no remote peer: consider resync is finished */ + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN) && + (tick_is_expired(peers->resync_timeout, now_ms) || !peers->remote->next)) { + /* Resync from remote peer needed + * no peer was assigned for the lesson + * and resync timeout expire */ + + /* flag no more resync from remote, consider resync is finished */ + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_REMOTE_FINISHED|PEERS_F_DBG_RESYNC_REMOTETIMEOUT); + } + + if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) { + /* Resync not finished*/ + /* reschedule task to resync timeout if not expired, to ended resync if needed */ + if (!tick_is_expired(peers->resync_timeout, now_ms)) + task->expire = tick_first(task->expire, peers->resync_timeout); + } +} + +/* Process the sync task for a stopping process. It is called from process_peer_sync() only */ +static void __process_stopping_peer_sync(struct task *task, struct peers *peers, unsigned int state) +{ + struct peer *peer; + struct shared_table *st; + static int dont_stop = 0; + + /* For each peer */ + for (peer = peers->remote; peer; peer = peer->next) { + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + + sync_peer_learn_state(peers, peer); + sync_peer_app_state(peers, peer); + + /* Peer changes, if any, were now ack by the sync task. Unblock + * the peer (any wakeup should already be performed, no need to + * do it here) + */ + peer->flags &= ~PEER_F_WAIT_SYNCTASK_ACK; + + if ((state & TASK_WOKEN_SIGNAL) && !dont_stop) { + /* we're killing a connection, we must apply a random delay before + * retrying otherwise the other end will do the same and we can loop + * for a while. + */ + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); + if (peer->appctx) { + peer_session_forceshutdown(peer); + sync_peer_app_state(peers, peer); } } - ps = peers->local; - if (ps->flags & PEER_F_TEACH_COMPLETE) { - if (peers->flags & PEERS_F_DONOTSTOP) { - /* resync of new process was complete, current process can die now */ - _HA_ATOMIC_DEC(&jobs); - peers->flags &= ~PEERS_F_DONOTSTOP; - for (st = ps->tables; st ; st = st->next) - HA_ATOMIC_DEC(&st->table->refcnt); - } + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); + } + + /* We've just received the signal */ + if (state & TASK_WOKEN_SIGNAL) { + if (!dont_stop) { + /* add DO NOT STOP flag if not present */ + _HA_ATOMIC_INC(&jobs); + dont_stop = 1; + + /* Set resync timeout for the local peer and request a immediate reconnect */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + peers->local->reconnect = now_ms; } - else if (!ps->appctx) { - /* Re-arm resync timeout if necessary */ - if (!tick_isset(peers->resync_timeout)) - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } - /* If there's no active peer connection */ - if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED && - !tick_is_expired(peers->resync_timeout, now_ms) && - (ps->statuscode == 0 || - ps->statuscode == PEER_SESS_SC_SUCCESSCODE || - ps->statuscode == PEER_SESS_SC_CONNECTEDCODE || - ps->statuscode == PEER_SESS_SC_TRYAGAIN)) { - /* The resync is finished for the local peer and - * the resync timeout is not expired and - * connection never tried - * or previous peer connection was successfully established - * or previous tcp connect succeeded but init state incomplete - * or during previous connect, peer replies a try again statuscode */ - - if (!tick_is_expired(ps->reconnect, now_ms)) { - /* reconnection timer is not expired. reschedule task for reconnect */ - task->expire = tick_first(task->expire, ps->reconnect); - } - else { - /* connect to the local peer if we must push a local sync */ - if (peers->flags & PEERS_F_DONOTSTOP) { - peer_session_create(peers, ps); - } - } + peer = peers->local; + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + if (peer->flags & PEER_F_LOCAL_TEACH_COMPLETE) { + if (dont_stop) { + /* resync of new process was complete, current process can die now */ + _HA_ATOMIC_DEC(&jobs); + dont_stop = 0; + for (st = peer->tables; st ; st = st->next) + HA_ATOMIC_DEC(&st->table->refcnt); + } + } + else if (!peer->appctx) { + /* Re-arm resync timeout if necessary */ + if (!tick_isset(peers->resync_timeout)) + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + + /* If there's no active peer connection */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED && + !tick_is_expired(peers->resync_timeout, now_ms) && + (peer->statuscode == 0 || + peer->statuscode == PEER_SESS_SC_SUCCESSCODE || + peer->statuscode == PEER_SESS_SC_CONNECTEDCODE || + peer->statuscode == PEER_SESS_SC_TRYAGAIN)) { + /* The resync is finished for the local peer and + * the resync timeout is not expired and + * connection never tried + * or previous peer connection was successfully established + * or previous tcp connect succeeded but init state incomplete + * or during previous connect, peer replies a try again statuscode */ + + if (!tick_is_expired(peer->reconnect, now_ms)) { + /* reconnection timer is not expired. reschedule task for reconnect */ + task->expire = tick_first(task->expire, peer->reconnect); } - else { - /* Other error cases */ - if (peers->flags & PEERS_F_DONOTSTOP) { - /* unable to resync new process, current process can die now */ - _HA_ATOMIC_DEC(&jobs); - peers->flags &= ~PEERS_F_DONOTSTOP; - for (st = ps->tables; st ; st = st->next) - HA_ATOMIC_DEC(&st->table->refcnt); + else { + /* connect to the local peer if we must push a local sync */ + if (dont_stop) { + peer_session_create(peers, peer); } } } - else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE ) { - /* Reset resync timeout during a resync */ - peers->resync_timeout = TICK_ETERNITY; - - /* current peer connection is active and established - * wake up all peer handlers to push remaining local updates */ - for (st = ps->tables; st ; st = st->next) { - if (st->last_pushed != st->table->localupdate) { - appctx_wakeup(ps->appctx); - break; - } + else { + /* Other error cases */ + if (dont_stop) { + /* unable to resync new process, current process can die now */ + _HA_ATOMIC_DEC(&jobs); + dont_stop = 0; + for (st = peer->tables; st ; st = st->next) + HA_ATOMIC_DEC(&st->table->refcnt); } } - } /* stopping */ + } + else if (peer->statuscode == PEER_SESS_SC_SUCCESSCODE ) { + /* Reset resync timeout during a resync */ + peers->resync_timeout = TICK_ETERNITY; + + /* current peer connection is active and established + * wake up all peer handlers to push remaining local updates */ + for (st = peer->tables; st ; st = st->next) { + if (st->last_pushed != st->table->localupdate) { + appctx_wakeup(peer->appctx); + break; + } + } + } + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); +} - /* Release lock for all peers of the section */ - for (ps = peers->remote; ps; ps = ps->next) - HA_SPIN_UNLOCK(PEER_LOCK, &ps->lock); +/* + * Task processing function to manage re-connect, peer session + * tasks wakeup on local update and heartbeat. Let's keep it exported so that it + * resolves in stack traces and "show tasks". + */ +struct task *process_peer_sync(struct task * task, void *context, unsigned int state) +{ + struct peers *peers = context; + + task->expire = TICK_ETERNITY; + + if (!stopping) { + /* Normal case (not soft stop)*/ + __process_running_peer_sync(task, peers, state); + + } + else { + /* soft stop case */ + __process_stopping_peer_sync(task, peers, state); + } /* stopping */ /* Wakeup for re-connect */ return task; @@ -3940,7 +3969,7 @@ static int peers_dump_head(struct buffer *msg, struct appctx *appctx, struct pee peers, tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900, tm.tm_hour, tm.tm_min, tm.tm_sec, - peers->id, peers->disabled, peers->flags, + peers->id, peers->disabled, HA_ATOMIC_LOAD(&peers->flags), peers->resync_timeout ? tick_is_expired(peers->resync_timeout, now_ms) ? "<PAST>" : human_time(TICKS_TO_MS(peers->resync_timeout - now_ms), @@ -3966,12 +3995,14 @@ static int peers_dump_peer(struct buffer *msg, struct appctx *appctx, struct pee struct stream *peer_s; struct shared_table *st; - addr_to_str(&peer->addr, pn, sizeof pn); - chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d last_status=%s", + addr_to_str(&peer->srv->addr, pn, sizeof pn); + chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d app_state=%s learn_state=%s last_status=%s", peer, peer->id, peer->local ? "local" : "remote", peer->appctx ? "active" : "inactive", - pn, get_host_port(&peer->addr), + pn, peer->srv->svc_port, + peer_app_state_str(peer->appstate), + peer_learn_state_str(peer->learnstate), statuscode_str(peer->statuscode)); chunk_appendf(msg, " last_hdshk=%s\n", @@ -40,31 +40,30 @@ static struct list pools __read_mostly = LIST_HEAD_INIT(pools); int mem_poison_byte __read_mostly = 'P'; int pool_trim_in_progress = 0; uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */ -#ifdef DEBUG_FAIL_ALLOC +#if defined(DEBUG_FAIL_ALLOC) && (DEBUG_FAIL_ALLOC > 0) POOL_DBG_FAIL_ALLOC | #endif -#ifdef DEBUG_DONT_SHARE_POOLS +#if defined(DEBUG_DONT_SHARE_POOLS) && (DEBUG_DONT_SHARE_POOLS > 0) POOL_DBG_DONT_MERGE | #endif -#ifdef DEBUG_POOL_INTEGRITY +#if defined(DEBUG_POOL_INTEGRITY) && (DEBUG_POOL_INTEGRITY > 0) POOL_DBG_COLD_FIRST | -#endif -#ifdef DEBUG_POOL_INTEGRITY POOL_DBG_INTEGRITY | #endif -#ifdef CONFIG_HAP_NO_GLOBAL_POOLS +#if defined(CONFIG_HAP_NO_GLOBAL_POOLS) POOL_DBG_NO_GLOBAL | #endif -#if defined(DEBUG_NO_POOLS) || defined(DEBUG_UAF) +#if defined(DEBUG_NO_POOLS) && (DEBUG_NO_POOLS > 0) POOL_DBG_NO_CACHE | #endif -#if defined(DEBUG_POOL_TRACING) +#if defined(DEBUG_POOL_TRACING) && (DEBUG_POOL_TRACING > 0) POOL_DBG_CALLER | #endif -#if defined(DEBUG_MEMORY_POOLS) +#if defined(DEBUG_MEMORY_POOLS) && (DEBUG_MEMORY_POOLS > 0) POOL_DBG_TAG | #endif -#if defined(DEBUG_UAF) +#if defined(DEBUG_UAF) && (DEBUG_UAF > 0) + POOL_DBG_NO_CACHE | POOL_DBG_UAF | #endif 0; @@ -497,7 +496,7 @@ void pool_check_pattern(struct pool_cache_head *pch, struct pool_head *pool, str u = ptr[ofs++]; while (ofs < size / sizeof(*ptr)) { if (unlikely(ptr[ofs] != u)) { - pool_inspect_item("cache corruption detected", pool, item, caller); + pool_inspect_item("cache corruption detected", pool, item, caller, ofs * sizeof(*ptr)); ABORT_NOW(); } ofs++; @@ -962,8 +961,12 @@ void pool_destroy_all() } } -/* carefully inspects an item upon fatal error and emit diagnostics */ -void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller) +/* carefully inspects an item upon fatal error and emit diagnostics. + * If ofs < 0, no hint is provided regarding the content location. However if + * ofs >= 0, then we also try to inspect around that place where corruption + * was detected. + */ +void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller, ssize_t ofs) { const struct pool_head *the_pool = NULL; @@ -980,6 +983,11 @@ void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item " pool: %p ('%s', size %u, real %u, users %u)\n", item, pool, pool->name, pool->size, pool->alloc_sz, pool->users); + if (ofs >= 0) { + chunk_printf(&trash, "Contents around first corrupted address relative to pool item:.\n"); + dump_area_with_syms(&trash, item, item + ofs, NULL, NULL, NULL); + } + if (pool_debugging & POOL_DBG_TAG) { const void **pool_mark; struct pool_head *ph; @@ -1015,51 +1023,16 @@ void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item } if (!the_pool) { - const char *start, *end, *p; + chunk_appendf(&trash, + "Tag does not match any other pool.\n"); pool_mark = (const void **)(((char *)item) + pool->size); - chunk_appendf(&trash, - "Tag does not match any other pool.\n" - "Contents around address %p+%lu=%p:\n", - item, (ulong)((const void*)pool_mark - (const void*)item), - pool_mark); - - /* dump in word-sized blocks */ - start = (const void *)(((uintptr_t)pool_mark - 32) & -sizeof(void*)); - end = (const void *)(((uintptr_t)pool_mark + 32 + sizeof(void*) - 1) & -sizeof(void*)); - - while (start < end) { - dump_addr_and_bytes(&trash, " ", start, sizeof(void*)); - chunk_strcat(&trash, " ["); - for (p = start; p < start + sizeof(void*); p++) { - if (!may_access(p)) - chunk_strcat(&trash, "*"); - else if (isprint((unsigned char)*p)) - chunk_appendf(&trash, "%c", *p); - else - chunk_strcat(&trash, "."); - } - - if (may_access(start)) - tag = *(const void **)start; - else - tag = NULL; - - if (tag == pool) { - /* the pool can often be there so let's detect it */ - chunk_appendf(&trash, "] [pool:%s", pool->name); - } - else if (tag) { - /* print pointers that resolve to a symbol */ - size_t back_data = trash.data; - chunk_strcat(&trash, "] ["); - if (!resolve_sym_name(&trash, NULL, tag)) - trash.data = back_data; - } - - chunk_strcat(&trash, "]\n"); - start = p; - } + if (resolve_sym_name(&trash, "Resolving the tag as a pool_free() location: ", *pool_mark)) + chunk_appendf(&trash, "\n"); + else + chunk_appendf(&trash, " (no match).\n"); + + dump_area_with_syms(&trash, item, pool_mark, pool, "pool", pool->name); } } } diff --git a/src/proto_quic.c b/src/proto_quic.c index 899cffe..93a24af 100644 --- a/src/proto_quic.c +++ b/src/proto_quic.c @@ -277,7 +277,7 @@ int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct s int quic_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; struct conn_src *src; @@ -301,67 +301,12 @@ int quic_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - fd = conn->handle.fd = sock_create_server_socket(conn); - - if (fd == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is ok, perform protocol specific settings */ /* allow specific binding : * - server-specific at first * - proxy-specific next @@ -762,7 +707,7 @@ static int quic_alloc_dghdlrs(void) quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(*quic_cid_trees)); if (!quic_cid_trees) { - ha_alert("Failed to allocate global CIDs trees.\n"); + ha_alert("Failed to allocate global quic CIDs trees.\n"); return 0; } diff --git a/src/proto_rhttp.c b/src/proto_rhttp.c index 452ee32..a6fc955 100644 --- a/src/proto_rhttp.c +++ b/src/proto_rhttp.c @@ -13,6 +13,7 @@ #include <haproxy/proxy.h> #include <haproxy/sample.h> #include <haproxy/server.h> +#include <haproxy/session.h> #include <haproxy/sock.h> #include <haproxy/ssl_sock.h> #include <haproxy/task.h> @@ -33,6 +34,7 @@ struct protocol proto_rhttp = { .listen = rhttp_bind_listener, .enable = rhttp_enable_listener, .disable = rhttp_disable_listener, + .suspend = rhttp_suspend_listener, .add = default_add_listener, .unbind = rhttp_unbind_receiver, .resume = default_resume_listener, @@ -54,11 +56,20 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr { struct connection *conn = conn_new(srv); struct sockaddr_storage *bind_addr = NULL; + struct session *sess = NULL; if (!conn) goto err; HA_ATOMIC_INC(&th_ctx->nb_rhttp_conns); + /* session origin is only set after reversal. This ensures fetches + * will be functional only after reversal, in particular src/dst. + */ + sess = session_new(l->bind_conf->frontend, l, NULL); + if (!sess) + goto err; + + conn_set_owner(conn, sess, conn_session_free); conn_set_reverse(conn, &l->obj_type); if (alloc_bind_address(&bind_addr, srv, srv->proxy, NULL) != SRV_STATUS_OK) @@ -71,6 +82,14 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr *conn->dst = srv->addr; set_host_port(conn->dst, srv->svc_port); + conn->send_proxy_ofs = 0; + if (srv->pp_opts) { + conn->flags |= CO_FL_SEND_PROXY; + conn->send_proxy_ofs = 1; /* must compute size */ + } + + /* TODO support SOCKS4 */ + if (conn_prepare(conn, protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) goto err; @@ -81,7 +100,7 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr if (srv->ssl_ctx.sni) { struct sample *sni_smp = NULL; /* TODO remove NULL session which can cause crash depending on the SNI sample expr used. */ - sni_smp = sample_fetch_as_type(srv->proxy, NULL, NULL, + sni_smp = sample_fetch_as_type(srv->proxy, sess, NULL, SMP_OPT_DIR_REQ | SMP_OPT_FINAL, srv->ssl_ctx.sni, SMP_T_STR); if (smp_make_safe(sni_smp)) @@ -89,21 +108,35 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr } #endif /* USE_OPENSSL */ + /* The CO_FL_SEND_PROXY flag may have been set by the connect method, + * if so, add our handshake pseudo-XPRT now. + */ + if (conn->flags & CO_FL_HANDSHAKE) { + if (xprt_add_hs(conn) < 0) + goto err; + } + if (conn_xprt_start(conn) < 0) goto err; if (!srv->use_ssl || (!srv->ssl_ctx.alpn_str && !srv->ssl_ctx.npn_str) || srv->mux_proto) { - if (conn_install_mux_be(conn, NULL, NULL, NULL) < 0) + if (conn_install_mux_be(conn, NULL, sess, NULL) < 0) goto err; } - /* Not expected here. */ - BUG_ON((conn->flags & CO_FL_HANDSHAKE)); return conn; err: + if (l->rx.rhttp.state != LI_PRECONN_ST_ERR) { + send_log(l->bind_conf->frontend, LOG_ERR, + "preconnect %s::%s: Error on conn allocation.\n", + l->bind_conf->frontend->id, l->bind_conf->rhttp_srvname); + l->rx.rhttp.state = LI_PRECONN_ST_ERR; + } + + /* No need to free session as conn.destroy_cb will take care of it. */ if (conn) { conn_stop_tracking(conn); conn_xprt_shutw(conn); @@ -284,11 +317,12 @@ int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen) /* Retrieve the first thread usable for this listener. */ mask = listener->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled); - task_tid = my_ffsl(mask) + ha_tgroup_info[listener->rx.bind_tgroup].base; + task_tid = my_ffsl(mask) - 1 + ha_tgroup_info[listener->rx.bind_tgroup].base; if (!(task = task_new_on(task_tid))) { snprintf(errmsg, errlen, "Out of memory."); goto err; } + task->process = rhttp_process; task->context = listener; listener->rx.rhttp.task = task; @@ -363,6 +397,13 @@ int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen) return ERR_ALERT | ERR_FATAL; } +/* Do not support "disable frontend" for rhttp protocol. */ +int rhttp_suspend_listener(struct listener *l) +{ + send_log(l->bind_conf->frontend, LOG_ERR, "cannot disable a reverse-HTTP listener.\n"); + return -1; +} + void rhttp_enable_listener(struct listener *l) { if (l->rx.rhttp.state < LI_PRECONN_ST_INIT) { @@ -372,7 +413,7 @@ void rhttp_enable_listener(struct listener *l) l->rx.rhttp.state = LI_PRECONN_ST_INIT; } - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_INIT); } void rhttp_disable_listener(struct listener *l) @@ -437,10 +478,10 @@ void rhttp_unbind_receiver(struct listener *l) int rhttp_set_affinity(struct connection *conn, int new_tid) { - /* Explicitely disable connection thread migration on accept. Indeed, + /* Explicitly disable connection thread migration on accept. Indeed, * it's unsafe to move a connection with its FD to another thread. Note * that active reverse task thread migration should be sufficient to - * ensure repartition of reversed connections accross listener threads. + * ensure repartition of reversed connections across listener threads. */ return -1; } @@ -452,7 +493,7 @@ int rhttp_accepting_conn(const struct receiver *rx) INITCALL1(STG_REGISTER, protocol_register, &proto_rhttp); -/* perform minimal intializations */ +/* perform minimal initializations */ static void init_rhttp() { int i; diff --git a/src/proto_tcp.c b/src/proto_tcp.c index 45ce27f..63be775 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -265,7 +265,7 @@ int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct so int tcp_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; struct conn_src *src; @@ -298,68 +298,14 @@ int tcp_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - fd = conn->handle.fd = sock_create_server_socket(conn); - if (fd == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1 || - (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is OK, continue with protocol specific settings */ if (be->options & PR_O_TCP_SRV_KA) { setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)); diff --git a/src/proto_udp.c b/src/proto_udp.c index 9855974..7308e98 100644 --- a/src/proto_udp.c +++ b/src/proto_udp.c @@ -155,6 +155,26 @@ int udp_bind_listener(struct listener *listener, char *errmsg, int errlen) if (global.tune.frontend_sndbuf) setsockopt(listener->rx.fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf)); + if (listener->rx.flags & RX_F_PASS_PKTINFO) { + /* set IP_PKTINFO to retrieve destination address on recv */ + switch (listener->rx.addr.ss_family) { + case AF_INET: +#if defined(IP_PKTINFO) + setsockopt(listener->rx.fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one)); +#elif defined(IP_RECVDSTADDR) + setsockopt(listener->rx.fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one)); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + break; + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + setsockopt(listener->rx.fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); +#endif + break; + default: + break; + } + } + listener_set_state(listener, LI_LISTEN); udp_return: diff --git a/src/proto_uxst.c b/src/proto_uxst.c index 7988e00..7828e31 100644 --- a/src/proto_uxst.c +++ b/src/proto_uxst.c @@ -219,7 +219,7 @@ static int uxst_suspend_receiver(struct receiver *rx) */ static int uxst_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; @@ -239,65 +239,12 @@ static int uxst_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - if ((fd = conn->handle.fd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is ok, continue with protocol specific settings */ if (global.tune.server_sndbuf) setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf)); diff --git a/src/protocol.c b/src/protocol.c index 25ed6b7..399835a 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -157,13 +157,13 @@ int protocol_bind_all(int verbose) struct proxy *px = listener->bind_conf->frontend; if (lerr & ERR_ALERT) - ha_alert("Binding [%s:%d] for %s %s: %s\n", + ha_alert("Binding [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, errmsg); + proxy_type_str(px), px->id, proto->name, errmsg); else if (lerr & ERR_WARN) - ha_warning("Binding [%s:%d] for %s %s: %s\n", + ha_warning("Binding [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, errmsg); + proxy_type_str(px), px->id, proto->name, errmsg); } if (lerr != ERR_NONE) ha_free(&errmsg); @@ -183,13 +183,13 @@ int protocol_bind_all(int verbose) struct proxy *px = listener->bind_conf->frontend; if (lerr & ERR_ALERT) - ha_alert("Starting [%s:%d] for %s %s: %s\n", + ha_alert("Starting [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, msg); + proxy_type_str(px), px->id, proto->name, msg); else if (lerr & ERR_WARN) - ha_warning("Starting [%s:%d] for %s %s: %s\n", + ha_warning("Starting [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, msg); + proxy_type_str(px), px->id, proto->name, msg); } if (lerr & ERR_ABORT) break; diff --git a/src/proxy.c b/src/proxy.c index 19e6c4b..f1d9d7a 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -29,6 +29,7 @@ #include <haproxy/fd.h> #include <haproxy/filters.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/http_ana.h> #include <haproxy/http_htx.h> #include <haproxy/http_ext.h> @@ -177,18 +178,6 @@ void free_stick_rules(struct list *rules) } } -static void free_logformat_list(struct list *lfs) -{ - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, lfs, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } -} - void free_server_rules(struct list *srules) { struct server_rule *srule, *sruleb; @@ -196,7 +185,7 @@ void free_server_rules(struct list *srules) list_for_each_entry_safe(srule, sruleb, srules, list) { LIST_DELETE(&srule->list); free_acl_cond(srule->cond); - free_logformat_list(&srule->expr); + lf_expr_deinit(&srule->expr); free(srule->file); free(srule); } @@ -235,34 +224,17 @@ void free_proxy(struct proxy *p) #if defined(CONFIG_HAP_TRANSPARENT) free(p->conn_src.bind_hdr_name); #endif - if (p->conf.logformat_string != default_http_log_format && - p->conf.logformat_string != default_tcp_log_format && - p->conf.logformat_string != clf_http_log_format && - p->conf.logformat_string != default_https_log_format && - p->conf.logformat_string != httpclient_log_format) - free(p->conf.logformat_string); - - free(p->conf.lfs_file); - free(p->conf.uniqueid_format_string); istfree(&p->header_unique_id); - free(p->conf.uif_file); if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP) free(p->lbprm.map.srv); - if (p->mode == PR_MODE_SYSLOG) - free(p->lbprm.log.srv); - - if (p->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(p->conf.logformat_sd_string); - free(p->conf.lfsd_file); - - free(p->conf.error_logformat_string); - free(p->conf.elfs_file); list_for_each_entry_safe(cond, condb, &p->mon_fail_cond, list) { LIST_DELETE(&cond->list); free_acl_cond(cond); } + guid_remove(&p->guid); + EXTRA_COUNTERS_FREE(p->extra_counters_fe); EXTRA_COUNTERS_FREE(p->extra_counters_be); @@ -278,7 +250,7 @@ void free_proxy(struct proxy *p) LIST_DELETE(&rule->list); free_acl_cond(rule->cond); if (rule->dynamic) - free_logformat_list(&rule->be.expr); + lf_expr_deinit(&rule->be.expr); free(rule->file); free(rule); } @@ -293,10 +265,10 @@ void free_proxy(struct proxy *p) free_logger(log); } - free_logformat_list(&p->logformat); - free_logformat_list(&p->logformat_sd); - free_logformat_list(&p->format_unique_id); - free_logformat_list(&p->logformat_error); + lf_expr_deinit(&p->logformat); + lf_expr_deinit(&p->logformat_sd); + lf_expr_deinit(&p->format_unique_id); + lf_expr_deinit(&p->logformat_error); free_act_rules(&p->tcp_req.inspect_rules); free_act_rules(&p->tcp_rep.inspect_rules); @@ -344,6 +316,7 @@ void free_proxy(struct proxy *p) srv_free_params(&p->defsrv); list_for_each_entry_safe(l, l_next, &p->conf.listeners, by_fe) { + guid_remove(&l->guid); LIST_DELETE(&l->by_fe); LIST_DELETE(&l->by_bind); free(l->name); @@ -363,6 +336,7 @@ void free_proxy(struct proxy *p) free(bind_conf->arg); free(bind_conf->settings.interface); LIST_DELETE(&bind_conf->by_fe); + free(bind_conf->guid_prefix); free(bind_conf->rhttp_srvname); free(bind_conf); } @@ -1041,6 +1015,33 @@ static int proxy_parse_tcpka_intvl(char **args, int section, struct proxy *proxy } #endif +static int proxy_parse_guid(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *guid; + char *guid_err = NULL; + + if (curpx->cap & PR_CAP_DEF) { + ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, line, args[0]); + return -1; + } + + if (!*args[1]) { + memprintf(err, "'%s' : expects an argument", args[0]); + return -1; + } + + guid = args[1]; + if (guid_insert(&curpx->obj_type, guid, &guid_err)) { + memprintf(err, "'%s': %s", args[0], guid_err); + ha_free(&guid_err); + return -1; + } + + return 0; +} + /* This function inserts proxy <px> into the tree of known proxies (regular * ones or defaults depending on px->cap & PR_CAP_DEF). The proxy's name is * used as the storing key so it must already have been initialized. @@ -1275,50 +1276,6 @@ struct server *findserver(const struct proxy *px, const char *name) { return target; } -/* - * This function finds a server with matching "<puid> x <rid>" within - * selected proxy <px>. - * Using the combination of proxy-uid + revision id ensures that the function - * will either return the server we're expecting or NULL if it has been removed - * from the proxy. - */ -struct server *findserver_unique_id(const struct proxy *px, int puid, uint32_t rid) { - - struct server *cursrv; - - if (!px) - return NULL; - - for (cursrv = px->srv; cursrv; cursrv = cursrv->next) { - if (cursrv->puid == puid && cursrv->rid == rid) - return cursrv; - } - - return NULL; -} - -/* - * This function finds a server with matching "<name> x <rid>" within - * selected proxy <px>. - * Using the combination of name + revision id ensures that the function will - * either return the server we're expecting or NULL if it has been removed - * from the proxy. - */ -struct server *findserver_unique_name(const struct proxy *px, const char *name, uint32_t rid) { - - struct server *cursrv; - - if (!px) - return NULL; - - for (cursrv = px->srv; cursrv; cursrv = cursrv->next) { - if (!strcmp(cursrv->id, name) && cursrv->rid == rid) - return cursrv; - } - - return NULL; -} - /* This function checks that the designated proxy has no http directives * enabled. It will output a warning if there are, and will fix some of them. * It returns the number of fatal errors encountered. This should be called @@ -1331,35 +1288,25 @@ int proxy_cfg_ensure_no_http(struct proxy *curproxy) ha_warning("cookie will be ignored for %s '%s' (needs 'mode http').\n", proxy_type_str(curproxy), curproxy->id); } - if (isttest(curproxy->monitor_uri)) { - ha_warning("monitor-uri will be ignored for %s '%s' (needs 'mode http').\n", - proxy_type_str(curproxy), curproxy->id); - } if (curproxy->lbprm.algo & BE_LB_NEED_HTTP) { curproxy->lbprm.algo &= ~BE_LB_ALGO; curproxy->lbprm.algo |= BE_LB_ALGO_RR; ha_warning("Layer 7 hash not possible for %s '%s' (needs 'mode http'). Falling back to round robin.\n", proxy_type_str(curproxy), curproxy->id); } - if (curproxy->to_log & (LW_REQ | LW_RESP)) { - curproxy->to_log &= ~(LW_REQ | LW_RESP); - ha_warning("parsing [%s:%d] : HTTP log/header format not usable with %s '%s' (needs 'mode http').\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, - proxy_type_str(curproxy), curproxy->id); - } - if (curproxy->conf.logformat_string == default_http_log_format || - curproxy->conf.logformat_string == clf_http_log_format) { + if (curproxy->logformat.str == default_http_log_format || + curproxy->logformat.str == clf_http_log_format) { /* Note: we don't change the directive's file:line number */ - curproxy->conf.logformat_string = default_tcp_log_format; + curproxy->logformat.str = default_tcp_log_format; ha_warning("parsing [%s:%d] : 'option httplog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, + curproxy->logformat.conf.file, curproxy->logformat.conf.line, proxy_type_str(curproxy), curproxy->id); } - else if (curproxy->conf.logformat_string == default_https_log_format) { + else if (curproxy->logformat.str == default_https_log_format) { /* Note: we don't change the directive's file:line number */ - curproxy->conf.logformat_string = default_tcp_log_format; + curproxy->logformat.str = default_tcp_log_format; ha_warning("parsing [%s:%d] : 'option httpslog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, + curproxy->logformat.conf.file, curproxy->logformat.conf.line, proxy_type_str(curproxy), curproxy->id); } @@ -1410,14 +1357,11 @@ void init_new_proxy(struct proxy *p) LIST_INIT(&p->tcp_req.l5_rules); MT_LIST_INIT(&p->listener_queue); LIST_INIT(&p->loggers); - LIST_INIT(&p->logformat); - LIST_INIT(&p->logformat_sd); - LIST_INIT(&p->format_unique_id); - LIST_INIT(&p->logformat_error); LIST_INIT(&p->conf.bind); LIST_INIT(&p->conf.listeners); LIST_INIT(&p->conf.errors); LIST_INIT(&p->conf.args.list); + LIST_INIT(&p->conf.lf_checks); LIST_INIT(&p->filter_configs); LIST_INIT(&p->tcpcheck_rules.preset_vars); @@ -1436,6 +1380,8 @@ void init_new_proxy(struct proxy *p) /* Default to only allow L4 retries */ p->retry_type = PR_RE_CONN_FAILED; + guid_init(&p->guid); + p->extra_counters_fe = NULL; p->extra_counters_be = NULL; @@ -1460,29 +1406,12 @@ void proxy_preset_defaults(struct proxy *defproxy) defproxy->options2 |= PR_O2_INDEPSTR; defproxy->max_out_conns = MAX_SRV_LIST; - defproxy->defsrv.check.inter = DEF_CHKINTR; - defproxy->defsrv.check.fastinter = 0; - defproxy->defsrv.check.downinter = 0; - defproxy->defsrv.agent.inter = DEF_CHKINTR; - defproxy->defsrv.agent.fastinter = 0; - defproxy->defsrv.agent.downinter = 0; - defproxy->defsrv.check.rise = DEF_RISETIME; - defproxy->defsrv.check.fall = DEF_FALLTIME; - defproxy->defsrv.agent.rise = DEF_AGENT_RISETIME; - defproxy->defsrv.agent.fall = DEF_AGENT_FALLTIME; - defproxy->defsrv.check.port = 0; - defproxy->defsrv.agent.port = 0; - defproxy->defsrv.maxqueue = 0; - defproxy->defsrv.minconn = 0; - defproxy->defsrv.maxconn = 0; - defproxy->defsrv.max_reuse = -1; - defproxy->defsrv.max_idle_conns = -1; - defproxy->defsrv.pool_purge_delay = 5000; - defproxy->defsrv.slowstart = 0; - defproxy->defsrv.onerror = DEF_HANA_ONERR; - defproxy->defsrv.consecutive_errors_limit = DEF_HANA_ERRLIMIT; - defproxy->defsrv.uweight = defproxy->defsrv.iweight = 1; - LIST_INIT(&defproxy->defsrv.pp_tlvs); + srv_settings_init(&defproxy->defsrv); + + lf_expr_init(&defproxy->logformat); + lf_expr_init(&defproxy->logformat_sd); + lf_expr_init(&defproxy->format_unique_id); + lf_expr_init(&defproxy->logformat_error); defproxy->email_alert.level = LOG_ALERT; defproxy->load_server_state_from_file = PR_SRV_STATE_FILE_UNSPEC; @@ -1554,27 +1483,16 @@ void proxy_free_defaults(struct proxy *defproxy) h = h_next; } - if (defproxy->conf.logformat_string != default_http_log_format && - defproxy->conf.logformat_string != default_tcp_log_format && - defproxy->conf.logformat_string != clf_http_log_format && - defproxy->conf.logformat_string != default_https_log_format) { - ha_free(&defproxy->conf.logformat_string); - } - - if (defproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - ha_free(&defproxy->conf.logformat_sd_string); + lf_expr_deinit(&defproxy->logformat); + lf_expr_deinit(&defproxy->logformat_sd); + lf_expr_deinit(&defproxy->logformat_error); + lf_expr_deinit(&defproxy->format_unique_id); list_for_each_entry_safe(log, logb, &defproxy->loggers, list) { LIST_DEL_INIT(&log->list); free_logger(log); } - ha_free(&defproxy->conf.uniqueid_format_string); - ha_free(&defproxy->conf.error_logformat_string); - ha_free(&defproxy->conf.lfs_file); - ha_free(&defproxy->conf.lfsd_file); - ha_free(&defproxy->conf.uif_file); - ha_free(&defproxy->conf.elfs_file); chunk_destroy(&defproxy->log_tag); free_email_alert(defproxy); @@ -1652,6 +1570,7 @@ void proxy_unref_defaults(struct proxy *px) */ struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg) { + uint last_change; struct proxy *curproxy; if ((curproxy = calloc(1, sizeof(*curproxy))) == NULL) { @@ -1660,7 +1579,13 @@ struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg) } init_new_proxy(curproxy); - curproxy->last_change = ns_to_sec(now_ns); + + last_change = ns_to_sec(now_ns); + if (cap & PR_CAP_FE) + curproxy->fe_counters.last_change = last_change; + if (cap & PR_CAP_BE) + curproxy->be_counters.last_change = last_change; + curproxy->id = strdup(name); curproxy->cap = cap; @@ -1815,39 +1740,9 @@ static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defpro if (defproxy->defbe.name) curproxy->defbe.name = strdup(defproxy->defbe.name); - /* get either a pointer to the logformat string or a copy of it */ - curproxy->conf.logformat_string = defproxy->conf.logformat_string; - if (curproxy->conf.logformat_string && - curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - curproxy->conf.logformat_string = strdup(curproxy->conf.logformat_string); - - if (defproxy->conf.lfs_file) { - curproxy->conf.lfs_file = strdup(defproxy->conf.lfs_file); - curproxy->conf.lfs_line = defproxy->conf.lfs_line; - } - - /* get either a pointer to the logformat string for RFC5424 structured-data or a copy of it */ - curproxy->conf.logformat_sd_string = defproxy->conf.logformat_sd_string; - if (curproxy->conf.logformat_sd_string && - curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - curproxy->conf.logformat_sd_string = strdup(curproxy->conf.logformat_sd_string); - - if (defproxy->conf.lfsd_file) { - curproxy->conf.lfsd_file = strdup(defproxy->conf.lfsd_file); - curproxy->conf.lfsd_line = defproxy->conf.lfsd_line; - } - - curproxy->conf.error_logformat_string = defproxy->conf.error_logformat_string; - if (curproxy->conf.error_logformat_string) - curproxy->conf.error_logformat_string = strdup(curproxy->conf.error_logformat_string); - - if (defproxy->conf.elfs_file) { - curproxy->conf.elfs_file = strdup(defproxy->conf.elfs_file); - curproxy->conf.elfs_line = defproxy->conf.elfs_line; - } + lf_expr_dup(&defproxy->logformat, &curproxy->logformat); + lf_expr_dup(&defproxy->logformat_sd, &curproxy->logformat_sd); + lf_expr_dup(&defproxy->logformat_error, &curproxy->logformat_error); } if (curproxy->cap & PR_CAP_BE) { @@ -1877,17 +1772,10 @@ static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defpro LIST_APPEND(&curproxy->loggers, &node->list); } - curproxy->conf.uniqueid_format_string = defproxy->conf.uniqueid_format_string; - if (curproxy->conf.uniqueid_format_string) - curproxy->conf.uniqueid_format_string = strdup(curproxy->conf.uniqueid_format_string); + lf_expr_dup(&defproxy->format_unique_id, &curproxy->format_unique_id); chunk_dup(&curproxy->log_tag, &defproxy->log_tag); - if (defproxy->conf.uif_file) { - curproxy->conf.uif_file = strdup(defproxy->conf.uif_file); - curproxy->conf.uif_line = defproxy->conf.uif_line; - } - /* copy default header unique id */ if (isttest(defproxy->header_unique_id)) { const struct ist copy = istdup(defproxy->header_unique_id); @@ -2008,11 +1896,11 @@ void proxy_cond_disable(struct proxy *p) */ if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP || p->mode == PR_MODE_SYSLOG) && !(p->cap & PR_CAP_INT)) ha_warning("Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n", - p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->id, p->fe_counters.cum_conn, p->be_counters.cum_sess); if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP) && !(p->cap & PR_CAP_INT)) send_log(p, LOG_WARNING, "Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n", - p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->id, p->fe_counters.cum_conn, p->be_counters.cum_sess); if (p->table && p->table->size && p->table->sync_task) task_wakeup(p->table->sync_task, TASK_WOKEN_MSG); @@ -2099,7 +1987,7 @@ struct task *manage_proxy(struct task *t, void *context, unsigned int state) goto out; if (p->fe_sps_lim && - (wait = next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0))) { + (wait = next_event_delay(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0))) { /* we're blocking because a limit was reached on the number of * requests/s on the frontend. We want to re-check ASAP, which * means in 1 ms before estimated expiration date, because the @@ -2722,6 +2610,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_LISTEN, "clitcpka-intvl", proxy_parse_tcpka_intvl }, { CFG_LISTEN, "srvtcpka-intvl", proxy_parse_tcpka_intvl }, #endif + { CFG_LISTEN, "guid", proxy_parse_guid }, { 0, NULL, NULL }, }}; @@ -2819,9 +2708,8 @@ static void dump_server_addr(const struct sockaddr_storage *addr, char *addr_str * ->px, the proxy's id ->only_pxid, the server's pointer from ->sv, and the * choice of what to dump from ->show_conn. */ -static int dump_servers_state(struct stconn *sc) +static int dump_servers_state(struct appctx *appctx) { - struct appctx *appctx = __sc_appctx(sc); struct show_srv_ctx *ctx = appctx->svcctx; struct proxy *px = ctx->px; struct server *srv; @@ -2842,7 +2730,7 @@ static int dump_servers_state(struct stconn *sc) dump_server_addr(&srv->check.addr, srv_check_addr); dump_server_addr(&srv->agent.addr, srv_agent_addr); - srv_time_since_last_change = ns_to_sec(now_ns) - srv->last_change; + srv_time_since_last_change = ns_to_sec(now_ns) - srv->counters.last_change; bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0; srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0; @@ -2904,7 +2792,6 @@ static int dump_servers_state(struct stconn *sc) static int cli_io_handler_servers_state(struct appctx *appctx) { struct show_srv_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct proxy *curproxy; if (ctx->state == SHOW_SRV_HEAD) { @@ -2928,7 +2815,7 @@ static int cli_io_handler_servers_state(struct appctx *appctx) curproxy = ctx->px; /* servers are only in backends */ if ((curproxy->cap & PR_CAP_BE) && !(curproxy->cap & PR_CAP_INT)) { - if (!dump_servers_state(sc)) + if (!dump_servers_state(appctx)) return 0; } /* only the selected proxy is dumped */ @@ -3263,13 +3150,8 @@ static int cli_parse_show_errors(char **args, char *payload, struct appctx *appc static int cli_io_handler_show_errors(struct appctx *appctx) { struct show_errors_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); extern const char *monthname[12]; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); if (!ctx->px) { @@ -3399,7 +3281,7 @@ static int cli_io_handler_show_errors(struct appctx *appctx) newline = ctx->bol; newptr = dump_text_line(&trash, es->buf, global.tune.bufsize, es->buf_len, &newline, ctx->ptr); if (newptr == ctx->ptr) { - sc_need_room(sc, 0); + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); goto cant_send_unlock; } diff --git a/src/qmux_http.c b/src/qmux_http.c index edf26b1..092eb15 100644 --- a/src/qmux_http.c +++ b/src/qmux_http.c @@ -78,6 +78,15 @@ size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); htx = htxbuf(buf); + + /* Extra care required for HTTP/1 responses without Content-Length nor + * chunked encoding. In this case, shutw callback will be use to signal + * the end of the message. QC_SF_UNKNOWN_PL_LENGTH is set to prevent a + * RESET_STREAM emission in this case. + */ + if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) + qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; + eom = (htx->flags & HTX_FL_EOM); ret = qcs->qcc->app_ops->snd_buf(qcs, buf, count); *fin = (eom && !b_data(buf)); @@ -86,23 +95,3 @@ size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, return ret; } - -/* QUIC MUX snd_buf reset. HTX data stored in <buf> of length <count> will be - * cleared. This can be used when data should not be transmitted any longer. - * - * Return the size in bytes of cleared data. - */ -size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count) -{ - struct htx *htx; - - TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - - htx = htx_from_buf(buf); - htx_reset(htx); - htx_to_buf(htx, buf); - - TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - - return count; -} diff --git a/src/qmux_trace.c b/src/qmux_trace.c index b213ed4..254ebb0 100644 --- a/src/qmux_trace.c +++ b/src/qmux_trace.c @@ -76,15 +76,15 @@ static void qmux_trace(enum trace_level level, uint64_t mask, if (qcc->conn->handle.qc) chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc); - chunk_appendf(&trace_buf, " md=%llu/%llu/%llu", - (ullong)qcc->rfctl.md, (ullong)qcc->tx.offsets, (ullong)qcc->tx.sent_offsets); + chunk_appendf(&trace_buf, " md=%llu/%llu", + (ullong)qcc->tx.fc.limit, (ullong)qcc->tx.fc.off_real); if (qcs) { chunk_appendf(&trace_buf, " qcs=%p .id=%llu .st=%s", qcs, (ullong)qcs->id, qcs_st_to_str(qcs->st)); - chunk_appendf(&trace_buf, " msd=%llu/%llu/%llu", - (ullong)qcs->tx.msd, (ullong)qcs->tx.offset, (ullong)qcs->tx.sent_offset); + chunk_appendf(&trace_buf, " msd=%llu/%llu", + (ullong)qcs->tx.fc.limit, (ullong)qcs->tx.fc.off_real); } if (mask & QMUX_EV_QCC_NQCS) { diff --git a/src/qpack-dec.c b/src/qpack-dec.c index 7a8726f..5798b08 100644 --- a/src/qpack-dec.c +++ b/src/qpack-dec.c @@ -111,7 +111,7 @@ int qpack_decode_enc(struct buffer *buf, int fin, void *ctx) * connection error of type H3_CLOSED_CRITICAL_STREAM. */ if (fin) { - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); return -1; } @@ -144,7 +144,7 @@ int qpack_decode_enc(struct buffer *buf, int fin, void *ctx) * QPACK_ENCODER_STREAM_ERROR. */ if (capacity) { - qcc_set_error(qcs->qcc, QPACK_ENCODER_STREAM_ERROR, 1); + qcc_set_error(qcs->qcc, QPACK_ERR_ENCODER_STREAM_ERROR, 1); return -1; } @@ -171,7 +171,7 @@ int qpack_decode_dec(struct buffer *buf, int fin, void *ctx) * connection error of type H3_CLOSED_CRITICAL_STREAM. */ if (fin) { - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); return -1; } @@ -196,7 +196,7 @@ int qpack_decode_dec(struct buffer *buf, int fin, void *ctx) */ /* For the moment haproxy does not emit dynamic table insertion. */ - qcc_set_error(qcs->qcc, QPACK_DECODER_STREAM_ERROR, 1); + qcc_set_error(qcs->qcc, QPACK_ERR_DECODER_STREAM_ERROR, 1); return -1; } else if (inst & QPACK_DEC_INST_SACK) { @@ -218,12 +218,12 @@ static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit, { *enc_ric = qpack_get_varint(raw, len, 8); if (*len == (uint64_t)-1) - return -QPACK_ERR_RIC; + return -QPACK_RET_RIC; *sign_bit = **raw & 0x8; *db = qpack_get_varint(raw, len, 7); if (*len == (uint64_t)-1) - return -QPACK_ERR_DB; + return -QPACK_RET_DB; return 0; } @@ -234,7 +234,7 @@ static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit, * the end of the list with empty strings as name/value. * * Returns the number of headers inserted into list excluding the end marker. - * In case of error, a negative code QPACK_ERR_* is returned. + * In case of error, a negative code QPACK_RET_* is returned. */ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, struct http_hdr *list, int list_size) @@ -262,7 +262,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, while (len) { if (hdr_idx >= list_size) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TOO_LARGE; + ret = -QPACK_RET_TOO_LARGE; goto out; } @@ -283,7 +283,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 3); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -292,7 +292,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, length = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -300,7 +300,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < length) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -316,7 +316,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * Count (Section 4.5.1), it MUST treat this as a connection error of * type QPACK_DECOMPRESSION_FAILED. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } else if (efl_type == QPACK_IFL_WPBI) { /* Indexed field line with post-base index @@ -329,7 +329,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 4); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -344,7 +344,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * Count (Section 4.5.1), it MUST treat this as a connection error of * type QPACK_DECOMPRESSION_FAILED. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } else if (efl_type & QPACK_IFL_BIT) { /* Indexed field line */ @@ -356,7 +356,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 6); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -375,7 +375,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * * TODO adjust this when dynamic table support is implemented. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } qpack_debug_printf(stderr, " t=%d index=%llu", !!static_tbl, (unsigned long long)index); @@ -391,7 +391,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 4); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -409,7 +409,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * * TODO adjust this when dynamic table support is implemented. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } qpack_debug_printf(stderr, " n=%d t=%d index=%llu", !!n, !!static_tbl, (unsigned long long)index); @@ -417,7 +417,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, length = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -429,13 +429,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, length, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -450,7 +450,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < length) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -468,7 +468,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, name_len = qpack_get_varint(&raw, &len, 3); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -477,7 +477,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < name_len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -488,13 +488,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, name_len, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -514,7 +514,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, value_len = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -522,7 +522,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < value_len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -533,13 +533,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, value_len, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -561,7 +561,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, */ if (!name.len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_DECOMP; goto out; } @@ -574,7 +574,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (hdr_idx >= list_size) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TOO_LARGE; + ret = -QPACK_RET_TOO_LARGE; goto out; } @@ -586,3 +586,11 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, qpack_debug_printf(stderr, "-- done: ret=%d\n", ret); return ret; } + +/* Convert return value from qpack_decode_fs() to a standard error code usable + * in CONNECTION_CLOSE or -1 for an internal error. + */ +int qpack_err_decode(const int value) +{ + return (value == -QPACK_RET_DECOMP) ? QPACK_ERR_DECOMPRESSION_FAILED : -1; +} diff --git a/src/queue.c b/src/queue.c index f20285b..e55bb58 100644 --- a/src/queue.c +++ b/src/queue.c @@ -114,10 +114,10 @@ unsigned int srv_dynamic_maxconn(const struct server *s) s->proxy->beconn * s->maxconn / s->proxy->fullconn); if ((s->cur_state == SRV_ST_STARTING) && - ns_to_sec(now_ns) < s->last_change + s->slowstart && - ns_to_sec(now_ns) >= s->last_change) { + ns_to_sec(now_ns) < s->counters.last_change + s->slowstart && + ns_to_sec(now_ns) >= s->counters.last_change) { unsigned int ratio; - ratio = 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart; + ratio = 100 * (ns_to_sec(now_ns) - s->counters.last_change) / s->slowstart; max = MAX(1, max * ratio / 100); } return max; diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c index 76a62ac..4bd1a7c 100644 --- a/src/quic_cc_cubic.c +++ b/src/quic_cc_cubic.c @@ -1,4 +1,6 @@ +#include <haproxy/global-t.h> #include <haproxy/quic_cc.h> +#include <haproxy/quic_cc_hystart.h> #include <haproxy/quic_trace.h> #include <haproxy/ticks.h> #include <haproxy/trace.h> @@ -79,6 +81,8 @@ struct cubic { * in recovery period) (in ms). */ uint32_t recovery_start_time; + /* HyStart++ state. */ + struct quic_hystart hystart; }; static void quic_cc_cubic_reset(struct quic_cc *cc) @@ -96,6 +100,8 @@ static void quic_cc_cubic_reset(struct quic_cc *cc) c->last_w_max = 0; c->W_est = 0; c->recovery_start_time = 0; + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) + quic_cc_hystart_reset(&c->hystart); TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } @@ -242,7 +248,7 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) * Note that K is stored in milliseconds. */ c->K = cubic_root(((c->last_w_max - path->cwnd) << CUBIC_SCALE_FACTOR_SHIFT) / (CUBIC_C_SCALED * path->mtu)); - /* Convert to miliseconds. */ + /* Convert to milliseconds. */ c->K *= 1000; c->W_target = c->last_w_max; } @@ -424,7 +430,25 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); switch (ev->type) { case QUIC_CC_EVT_ACK: - if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) { + struct quic_hystart *h = &c->hystart; + unsigned int acked = QUIC_MIN(ev->ack.acked, (uint64_t)HYSTART_LIMIT * path->mtu); + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (ev->ack.pn >= h->wnd_end) + h->wnd_end = UINT64_MAX; + if (quic_cc_hystart_may_enter_cs(&c->hystart)) { + /* Exit slow start and enter conservative slow start */ + c->state = QUIC_CC_ST_CS; + goto out; + } + } + else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { path->cwnd += ev->ack.acked; path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); } @@ -470,6 +494,69 @@ static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } +/* Conservative slow start callback. */ +static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev) +{ + struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); + + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); + + switch (ev->type) { + case QUIC_CC_EVT_ACK: + { + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + unsigned int acked = + QUIC_MIN(ev->ack.acked, (uint64_t)HYSTART_LIMIT * path->mtu) / HYSTART_CSS_GROWTH_DIVISOR; + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (quic_cc_hystart_may_reenter_ss(h)) { + /* Exit to slow start */ + c->state = QUIC_CC_ST_SS; + goto out; + } + + if (h->css_rnd_count >= HYSTART_CSS_ROUNDS) { + /* Exit to congestion avoidance + * + * RFC 9438 4.10. Slow start + * + * When CUBIC uses HyStart++ [RFC9406], it may exit the first slow start + * without incurring any packet loss and thus _W_max_ is undefined. In + * this special case, CUBIC sets _cwnd_prior = cwnd_ and switches to + * congestion avoidance. It then increases its congestion window size + * using Figure 1, where _t_ is the elapsed time since the beginning of + * the current congestion avoidance stage, _K_ is set to 0, and _W_max_ + * is set to the congestion window size at the beginning of the current + * congestion avoidance stage. + */ + c->last_w_max = path->cwnd; + c->t_epoch = 0; + c->state = QUIC_CC_ST_CA; + } + + break; + } + + case QUIC_CC_EVT_LOSS: + quic_enter_recovery(cc); + break; + case QUIC_CC_EVT_ECN_CE: + /* TODO */ + break; + } + + out: + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); + TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); +} + /* Recovery period callback */ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) { @@ -507,6 +594,7 @@ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, + [QUIC_CC_ST_CS] = quic_cc_cubic_cs_cb, [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, }; @@ -518,6 +606,17 @@ static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) return quic_cc_cubic_state_cbs[c->state](cc, ev); } +static void quic_cc_cubic_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + + if (c->state != QUIC_CC_ST_SS && c->state != QUIC_CC_ST_CS) + return; + + quic_cc_hystart_start_round(h, pn); +} + static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) { struct quic_cc_path *path; @@ -538,5 +637,6 @@ struct quic_cc_algo quic_cc_algo_cubic = { .init = quic_cc_cubic_init, .event = quic_cc_cubic_event, .slow_start = quic_cc_cubic_slow_start, + .hystart_start_round = quic_cc_cubic_hystart_start_round, .state_trace = quic_cc_cubic_state_trace, }; diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c index 405b0ba..ca29877 100644 --- a/src/quic_cc_newreno.c +++ b/src/quic_cc_newreno.c @@ -196,6 +196,10 @@ static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc) (unsigned long long)path->loss.nb_lost_pkt); } +static void quic_cc_nr_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ +} + static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb, @@ -215,6 +219,7 @@ struct quic_cc_algo quic_cc_algo_nr = { .init = quic_cc_nr_init, .event = quic_cc_nr_event, .slow_start = quic_cc_nr_slow_start, + .hystart_start_round = quic_cc_nr_hystart_start_round, .state_trace = quic_cc_nr_state_trace, }; diff --git a/src/quic_cli.c b/src/quic_cli.c index f237a1f..f0e147c 100644 --- a/src/quic_cli.c +++ b/src/quic_cli.c @@ -3,18 +3,29 @@ #include <haproxy/applet-t.h> #include <haproxy/cli.h> #include <haproxy/list.h> -#include <haproxy/tools.h> +#include <haproxy/mux_quic.h> #include <haproxy/quic_conn-t.h> #include <haproxy/quic_tp.h> +#include <haproxy/tools.h> /* incremented by each "show quic". */ unsigned int qc_epoch = 0; enum quic_dump_format { + QUIC_DUMP_FMT_DEFAULT, /* value used if not explicitly specified. */ + QUIC_DUMP_FMT_ONELINE, - QUIC_DUMP_FMT_FULL, + QUIC_DUMP_FMT_CUST, }; +#define QUIC_DUMP_FLD_TP 0x0001 +#define QUIC_DUMP_FLD_SOCK 0x0002 +#define QUIC_DUMP_FLD_PKTNS 0x0004 +#define QUIC_DUMP_FLD_CC 0x0008 +#define QUIC_DUMP_FLD_MUX 0x0010 +/* Do not forget to update FLD_MASK when adding a new field. */ +#define QUIC_DUMP_FLD_MASK 0x001f + /* appctx context used by "show quic" command */ struct show_quic_ctx { unsigned int epoch; @@ -22,10 +33,24 @@ struct show_quic_ctx { unsigned int thr; int flags; enum quic_dump_format format; + void *ptr; + int fields; }; #define QC_CLI_FL_SHOW_ALL 0x1 /* show closing/draining connections */ +/* Returns the output format for show quic. If specified explicitly use it as + * set. Else format depends if filtering on a single connection instance. If + * true, full format is preferred else oneline. + */ +static enum quic_dump_format cli_show_quic_format(const struct show_quic_ctx *ctx) +{ + if (ctx->format == QUIC_DUMP_FMT_DEFAULT) + return ctx->ptr ? QUIC_DUMP_FMT_CUST : QUIC_DUMP_FMT_ONELINE; + else + return ctx->format; +} + static int cli_parse_show_quic(char **args, char *payload, struct appctx *appctx, void *private) { struct show_quic_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); @@ -37,20 +62,87 @@ static int cli_parse_show_quic(char **args, char *payload, struct appctx *appctx ctx->epoch = _HA_ATOMIC_FETCH_ADD(&qc_epoch, 1); ctx->thr = 0; ctx->flags = 0; - ctx->format = QUIC_DUMP_FMT_ONELINE; + ctx->format = QUIC_DUMP_FMT_DEFAULT; + ctx->ptr = 0; + ctx->fields = 0; if (strcmp(args[argc], "oneline") == 0) { - /* format already used as default value */ + ctx->format = QUIC_DUMP_FMT_ONELINE; ++argc; } else if (strcmp(args[argc], "full") == 0) { - ctx->format = QUIC_DUMP_FMT_FULL; + ctx->format = QUIC_DUMP_FMT_CUST; + ctx->fields = QUIC_DUMP_FLD_MASK; ++argc; } + else if (*args[argc]) { + struct ist istarg = ist(args[argc]); + struct ist field = istsplit(&istarg, ','); + + do { + if (isteq(field, ist("tp"))) { + ctx->fields |= QUIC_DUMP_FLD_TP; + } + else if (isteq(field, ist("sock"))) { + ctx->fields |= QUIC_DUMP_FLD_SOCK; + } + else if (isteq(field, ist("pktns"))) { + ctx->fields |= QUIC_DUMP_FLD_PKTNS; + } + else if (isteq(field, ist("cc"))) { + ctx->fields |= QUIC_DUMP_FLD_CC; + } + else if (isteq(field, ist("mux"))) { + ctx->fields |= QUIC_DUMP_FLD_MUX; + } + else { + /* Current argument is comma-separated so it is + * interpreted as a field list but an unknown + * field name has been specified. + */ + if (istarg.len || ctx->fields) { + cli_err(appctx, "Invalid field.\n"); + return 1; + } + + break; + } + + field = istsplit(&istarg, ','); + } while (field.len); + + /* At least one valid field specified, select the associated + * format. Else parse the current argument as a filter. + */ + if (ctx->fields) { + ctx->format = QUIC_DUMP_FMT_CUST; + ++argc; + } + } + + if (*args[argc]) { + struct ist istarg = ist(args[argc]); + + if (istmatchi(istarg, ist("0x"))) { + char *nptr; + ctx->ptr = (void *)strtol(args[argc], &nptr, 16); + if (*nptr) { + cli_err(appctx, "Invalid quic_conn pointer.\n"); + return 1; + } + + if (!ctx->fields) + ctx->fields = QUIC_DUMP_FLD_MASK; - while (*args[argc]) { - if (strcmp(args[argc], "all") == 0) + ++argc; + } + else if (istmatch(istarg, ist("all"))) { ctx->flags |= QC_CLI_FL_SHOW_ALL; + } + else { + cli_err(appctx, "Invalid argument.\n"); + return 1; + } ++argc; } @@ -115,10 +207,8 @@ static void dump_quic_oneline(struct show_quic_ctx *ctx, struct quic_conn *qc) static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) { struct quic_pktns *pktns; - struct eb64_node *node; - struct qc_stream_desc *stream; char bufaddr[INET6_ADDRSTRLEN], bufport[6]; - int expire, i, addnl; + int expire, addnl; unsigned char cid_len; addnl = 0; @@ -137,12 +227,14 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) chunk_appendf(&trash, "\n"); - chunk_appendf(&trash, " loc. TPs:"); - quic_transport_params_dump(&trash, qc, &qc->rx.params); - chunk_appendf(&trash, "\n"); - chunk_appendf(&trash, " rem. TPs:"); - quic_transport_params_dump(&trash, qc, &qc->tx.params); - chunk_appendf(&trash, "\n"); + if (ctx->fields & QUIC_DUMP_FLD_TP) { + chunk_appendf(&trash, " loc. TPs:"); + quic_transport_params_dump(&trash, qc, &qc->rx.params); + chunk_appendf(&trash, "\n"); + chunk_appendf(&trash, " rem. TPs:"); + quic_transport_params_dump(&trash, qc, &qc->tx.params); + chunk_appendf(&trash, "\n"); + } /* Connection state */ if (qc->flags & QUIC_FL_CONN_CLOSING) @@ -170,44 +262,50 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) chunk_appendf(&trash, "\n"); /* Socket */ - chunk_appendf(&trash, " fd=%d", qc->fd); - if (qc->local_addr.ss_family == AF_INET || - qc->local_addr.ss_family == AF_INET6) { - addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr)); - port_to_str(&qc->local_addr, bufport, sizeof(bufport)); - chunk_appendf(&trash, " local_addr=%s:%s", bufaddr, bufport); + if (ctx->fields & QUIC_DUMP_FLD_SOCK) { + chunk_appendf(&trash, " fd=%d", qc->fd); + if (qc->local_addr.ss_family == AF_INET || + qc->local_addr.ss_family == AF_INET6) { + addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr)); + port_to_str(&qc->local_addr, bufport, sizeof(bufport)); + chunk_appendf(&trash, " local_addr=%s:%s", bufaddr, bufport); + + addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr)); + port_to_str(&qc->peer_addr, bufport, sizeof(bufport)); + chunk_appendf(&trash, " foreign_addr=%s:%s", bufaddr, bufport); + } - addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr)); - port_to_str(&qc->peer_addr, bufport, sizeof(bufport)); - chunk_appendf(&trash, " foreign_addr=%s:%s", bufaddr, bufport); + chunk_appendf(&trash, "\n"); } - chunk_appendf(&trash, "\n"); - /* Packet number spaces information */ - pktns = qc->ipktns; - if (pktns) { - chunk_appendf(&trash, " [initl] rx.ackrng=%-6zu tx.inflight=%-6zu", - pktns->rx.arngs.sz, pktns->tx.in_flight); - } + if (ctx->fields & QUIC_DUMP_FLD_PKTNS) { + pktns = qc->ipktns; + if (pktns) { + chunk_appendf(&trash, " [initl] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } - pktns = qc->hpktns; - if (pktns) { - chunk_appendf(&trash, " [hndshk] rx.ackrng=%-6zu tx.inflight=%-6zu\n", - pktns->rx.arngs.sz, pktns->tx.in_flight); - } + pktns = qc->hpktns; + if (pktns) { + chunk_appendf(&trash, " [hndshk] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } - pktns = qc->apktns; - if (pktns) { - chunk_appendf(&trash, " [01rtt] rx.ackrng=%-6zu tx.inflight=%-6zu\n", - pktns->rx.arngs.sz, pktns->tx.in_flight); + pktns = qc->apktns; + if (pktns) { + chunk_appendf(&trash, " [01rtt] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } } - chunk_appendf(&trash, " srtt=%-4u rttvar=%-4u rttmin=%-4u ptoc=%-4u cwnd=%-6llu" - " mcwnd=%-6llu sentpkts=%-6llu lostpkts=%-6llu reorderedpkts=%-6llu\n", - qc->path->loss.srtt, qc->path->loss.rtt_var, - qc->path->loss.rtt_min, qc->path->loss.pto_count, (ullong)qc->path->cwnd, - (ullong)qc->path->mcwnd, (ullong)qc->cntrs.sent_pkt, (ullong)qc->path->loss.nb_lost_pkt, (ullong)qc->path->loss.nb_reordered_pkt); + if (ctx->fields & QUIC_DUMP_FLD_CC) { + chunk_appendf(&trash, " srtt=%-4u rttvar=%-4u rttmin=%-4u ptoc=%-4u cwnd=%-6llu" + " mcwnd=%-6llu sentpkts=%-6llu lostpkts=%-6llu reorderedpkts=%-6llu\n", + qc->path->loss.srtt, qc->path->loss.rtt_var, + qc->path->loss.rtt_min, qc->path->loss.pto_count, (ullong)qc->path->cwnd, + (ullong)qc->path->mcwnd, (ullong)qc->cntrs.sent_pkt, (ullong)qc->path->loss.nb_lost_pkt, (ullong)qc->path->loss.nb_reordered_pkt); + } if (qc->cntrs.dropped_pkt) { chunk_appendf(&trash, " droppkts=%-6llu", qc->cntrs.dropped_pkt); @@ -256,23 +354,8 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) if (addnl) chunk_appendf(&trash, "\n"); - /* Streams */ - node = eb64_first(&qc->streams_by_id); - i = 0; - while (node) { - stream = eb64_entry(node, struct qc_stream_desc, by_id); - node = eb64_next(node); - - chunk_appendf(&trash, " | stream=%-8llu", (unsigned long long)stream->by_id.key); - chunk_appendf(&trash, " off=%-8llu ack=%-8llu", - (unsigned long long)stream->buf_offset, - (unsigned long long)stream->ack_offset); - - if (!(++i % 3)) { - chunk_appendf(&trash, "\n"); - i = 0; - } - } + if (ctx->fields & QUIC_DUMP_FLD_MUX && qc->mux_state == QC_MUX_READY) + qcc_show_quic(qc->qcc); chunk_appendf(&trash, "\n"); } @@ -280,7 +363,6 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) static int cli_io_handler_dump_quic(struct appctx *appctx) { struct show_quic_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct quic_conn *qc; thread_isolate(); @@ -288,16 +370,6 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) if (ctx->thr >= global.nbthread) goto done; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last stream being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) - LIST_DEL_INIT(&ctx->bref.users); - goto done; - } - chunk_reset(&trash); if (!LIST_ISEMPTY(&ctx->bref.users)) { @@ -309,7 +381,7 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns.n; /* Print legend for oneline format. */ - if (ctx->format == QUIC_DUMP_FMT_ONELINE) { + if (cli_show_quic_format(ctx) == QUIC_DUMP_FMT_ONELINE) { chunk_appendf(&trash, "# conn/frontend state " "in_flight infl_p lost_p " "Local Address Foreign Address " @@ -322,11 +394,12 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) int done = 0; if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].quic_conns) { - /* If closing connections requested through "all", move - * to quic_conns_clo list after browsing quic_conns. - * Else move directly to the next quic_conns thread. + /* If closing connections requested through "all" or a + * specific connection is filtered, move to + * quic_conns_clo list after browsing quic_conns. Else + * move directly to the next quic_conns thread. */ - if (ctx->flags & QC_CLI_FL_SHOW_ALL) { + if (ctx->flags & QC_CLI_FL_SHOW_ALL || ctx->ptr) { ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns_clo.n; continue; } @@ -344,6 +417,10 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) qc = LIST_ELEM(ctx->bref.ref, struct quic_conn *, el_th_ctx); if ((int)(qc->qc_epoch - ctx->epoch) > 0) done = 1; + + /* Skip to next element if filter on a different connection. */ + if (ctx->ptr && ctx->ptr != qc) + done = 1; } if (done) { @@ -355,13 +432,17 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) continue; } - switch (ctx->format) { - case QUIC_DUMP_FMT_FULL: + switch (cli_show_quic_format(ctx)) { + case QUIC_DUMP_FMT_CUST: dump_quic_full(ctx, qc); break; case QUIC_DUMP_FMT_ONELINE: dump_quic_oneline(ctx, qc); break; + + case QUIC_DUMP_FMT_DEFAULT: + /* An explicit format must be returned by cli_show_quic_format(). */ + ABORT_NOW(); } if (applet_putchk(appctx, &trash) == -1) { @@ -371,6 +452,10 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) } ctx->bref.ref = qc->el_th_ctx.n; + + /* If filtered connection displayed, show quic can be stopped early. */ + if (ctx->ptr) + goto done; } done: @@ -395,7 +480,7 @@ static void cli_release_show_quic(struct appctx *appctx) } static struct cli_kw_list cli_kws = {{ }, { - { { "show", "quic", NULL }, "show quic [oneline|full] [all] : display quic connections status", cli_parse_show_quic, cli_io_handler_dump_quic, cli_release_show_quic }, + { { "show", "quic", NULL }, "show quic [<format>] [<filter>] : display quic connections status", cli_parse_show_quic, cli_io_handler_dump_quic, cli_release_show_quic }, {{},} }}; diff --git a/src/quic_conn.c b/src/quic_conn.c index 5233496..6cc1d38 100644 --- a/src/quic_conn.c +++ b/src/quic_conn.c @@ -355,7 +355,7 @@ int qc_h3_request_reject(struct quic_conn *qc, uint64_t id) int ret = 0; struct quic_frame *ss, *rs; struct quic_enc_level *qel = qc->ael; - const uint64_t app_error_code = H3_REQUEST_REJECTED; + const uint64_t app_error_code = H3_ERR_REQUEST_REJECTED; TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc); @@ -544,10 +544,10 @@ int quic_build_post_handshake_frames(struct quic_conn *qc) goto leave; } - /* QUIC connection packet handler task (post handshake) */ struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state) { + struct list send_list = LIST_HEAD_INIT(send_list); struct quic_conn *qc = context; struct quic_enc_level *qel; @@ -592,9 +592,13 @@ struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int sta goto out; } + if (!qel_need_sending(qel, qc)) + goto out; + /* XXX TODO: how to limit the list frames to send */ - if (!qc_send_app_pkts(qc, &qel->pktns->tx.frms)) { - TRACE_DEVEL("qc_send_app_pkts() failed", QUIC_EV_CONN_IO_CB, qc); + qel_register_send(&send_list, qel, &qel->pktns->tx.frms); + if (!qc_send(qc, 0, &send_list)) { + TRACE_DEVEL("qc_send() failed", QUIC_EV_CONN_IO_CB, qc); goto out; } @@ -741,9 +745,9 @@ static struct quic_conn_closed *qc_new_cc_conn(struct quic_conn *qc) /* QUIC connection packet handler task. */ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) { - int ret; struct quic_conn *qc = context; - struct buffer *buf = NULL; + struct list send_list = LIST_HEAD_INIT(send_list); + struct quic_enc_level *qel; int st; struct tasklet *tl = (struct tasklet *)t; @@ -753,8 +757,8 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) TRACE_PROTO("connection state", QUIC_EV_CONN_IO_CB, qc, &st); if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) { - HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY); qc_ssl_provide_all_quic_data(qc, qc->xprt_ctx); + HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY); } /* Retranmissions */ @@ -771,11 +775,6 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) if (!qc_treat_rx_pkts(qc)) goto out; - if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) { - tasklet_wakeup(tl); - goto out; - } - if (qc->flags & QUIC_FL_CONN_TO_KILL) { TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_PHPKTS, qc); goto out; @@ -797,34 +796,21 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) } } - buf = qc_get_txb(qc); - if (!buf) - goto out; - - if (b_data(buf) && !qc_purge_txbuf(qc, buf)) - goto out; - - /* Currently buf cannot be non-empty at this stage. Even if a previous - * sendto() has failed it is emptied to simulate packet emission and - * rely on QUIC lost detection to try to emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); + /* Insert each QEL into sending list if needed. */ + list_for_each_entry(qel, &qc->qel_list, list) { + if (qel_need_sending(qel, qc)) + qel_register_send(&send_list, qel, &qel->pktns->tx.frms); + } - ret = qc_prep_hpkts(qc, buf, NULL); - if (ret == -1) { - qc_txb_release(qc); + /* Skip sending if no QEL with frames to sent. */ + if (LIST_ISEMPTY(&send_list)) goto out; - } - if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); + if (!qc_send(qc, 0, &send_list)) { + TRACE_DEVEL("qc_send() failed", QUIC_EV_CONN_IO_CB, qc); goto out; } - qc_txb_release(qc); - out: /* Release the Handshake encryption level and packet number space if * the Handshake is confirmed and if there is no need to send @@ -1818,7 +1804,14 @@ int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new qc_detach_th_ctx_list(qc, 0); node = eb64_first(qc->cids); - BUG_ON(!node || eb64_next(node)); /* One and only one CID must be present before affinity rebind. */ + /* One and only one CID must be present before affinity rebind. + * + * This could be triggered fairly easily if tasklet is scheduled just + * before thread migration for post-handshake state to generate new + * CIDs. In this case, QUIC_FL_CONN_IO_TO_REQUEUE should be used + * instead of tasklet_wakeup(). + */ + BUG_ON(!node || eb64_next(node)); conn_id = eb64_entry(node, struct quic_connection_id, seq_num); /* At this point no connection was accounted for yet on this diff --git a/src/quic_fctl.c b/src/quic_fctl.c new file mode 100644 index 0000000..b797e55 --- /dev/null +++ b/src/quic_fctl.c @@ -0,0 +1,96 @@ +#include <haproxy/quic_fctl.h> + +#include <haproxy/api.h> + +void qfctl_init(struct quic_fctl *fctl, uint64_t limit) +{ + fctl->limit = limit; + fctl->off_real = 0; + fctl->off_soft = 0; +} + +/* Returns true if real limit is blocked for <fctl> flow control instance. + * This happens if it is equal than current max value. + */ +int qfctl_rblocked(const struct quic_fctl *fctl) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real > fctl->limit); + return fctl->off_real == fctl->limit; +} + +/* Returns true if soft limit is blocked for <fctl> flow control instance. + * This happens if it is equal or greater than current max value. + */ +int qfctl_sblocked(const struct quic_fctl *fctl) +{ + return fctl->off_soft >= fctl->limit; +} + +/* Set a new <val> maximum value for <fctl> flow control instance. If current + * offset is already equal or more, the new value is ignored. Additionally, + * <unblocked_soft> and <unblocked_real> can be used as output parameters to + * detect if the current update result in one or both of these offsets to be + * unblocked. + * + * Returns true if max is incremented else false. + */ +int qfctl_set_max(struct quic_fctl *fctl, uint64_t val, + int *out_unblock_soft, int *out_unblock_real) +{ + int unblock_soft = 0, unblock_real = 0; + int ret = 0; + + if (fctl->limit < val) { + if (fctl->off_soft >= fctl->limit && fctl->off_soft < val) + unblock_soft = 1; + if (fctl->off_real == fctl->limit && fctl->off_real < val) + unblock_real = 1; + + fctl->limit = val; + ret = 1; + } + + if (out_unblock_soft) + *out_unblock_soft = unblock_soft; + if (out_unblock_real) + *out_unblock_real = unblock_real; + + return ret; +} + +/* Increment real offset of <fctl> flow control instance by <diff>. This cannot + * exceed <fctl> limit. + * + * Returns true if limit is reached after increment. + */ +int qfctl_rinc(struct quic_fctl *fctl, uint64_t diff) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real + diff > fctl->limit); + fctl->off_real += diff; + + return fctl->off_real == fctl->limit; +} + +/* Increment soft offset of <fctl> flow control instance by <diff>. This cannot + * be done if <fctl> limit was already reached. + * + * Returns true if limit is reached after increment. + */ +int qfctl_sinc(struct quic_fctl *fctl, uint64_t diff) +{ + /* Soft limit must not be incremented if already in excess. */ + BUG_ON(qfctl_sblocked(fctl)); + fctl->off_soft += diff; + + return fctl->off_soft >= fctl->limit; +} + +/* Return the remaining offset before reaching <fctl> limit. */ +uint64_t qfctl_rcap(const struct quic_fctl *fctl) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real > fctl->limit); + return fctl->limit - fctl->off_real; +} diff --git a/src/quic_retry.c b/src/quic_retry.c index 1c58e5e..f1d55b8 100644 --- a/src/quic_retry.c +++ b/src/quic_retry.c @@ -60,7 +60,7 @@ static int quic_generate_retry_token_aad(unsigned char *aad, unsigned char *p; p = aad; - *(uint32_t *)p = htonl(version); + write_u32(p, htonl(version)); p += sizeof version; p += quic_saddr_cpy(p, addr); memcpy(p, cid->data, cid->len); diff --git a/src/quic_rx.c b/src/quic_rx.c index 585c71a..d5b45d6 100644 --- a/src/quic_rx.c +++ b/src/quic_rx.c @@ -506,6 +506,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc, qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn); ev.ack.acked = pkt->in_flight_len; ev.ack.time_sent = pkt->time_sent; + ev.ack.pn = pkt->pn_node.key; quic_cc_event(&qc->path->cc, &ev); LIST_DEL_INIT(&pkt->list); quic_tx_packet_refdec(pkt); @@ -740,8 +741,11 @@ static int qc_handle_crypto_frm(struct quic_conn *qc, goto leave; } - if (ncb_data(ncbuf, 0)) + /* Reschedule with TASK_HEAVY if CRYPTO data ready for decoding. */ + if (ncb_data(ncbuf, 0)) { HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY); + tasklet_wakeup(qc->wait_event.tasklet); + } done: ret = 1; @@ -860,6 +864,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, case QUIC_FT_PING: break; case QUIC_FT_ACK: + case QUIC_FT_ACK_ECN: { unsigned int rtt_sample; rtt_sample = UINT_MAX; @@ -902,6 +907,9 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, if (!qc_handle_crypto_frm(qc, &frm.crypto, pkt, qel, &fast_retrans)) goto leave; break; + case QUIC_FT_NEW_TOKEN: + /* TODO */ + break; case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F: { struct qf_stream *strm_frm = &frm.stream; @@ -974,7 +982,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, break; case QUIC_FT_RETIRE_CONNECTION_ID: { - struct quic_cid_tree *tree; + struct quic_cid_tree *tree __maybe_unused; struct quic_connection_id *conn_id = NULL; if (!qc_handle_retire_connection_id_frm(qc, &frm, &pkt->dcid, &conn_id)) @@ -1001,6 +1009,10 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, } break; } + case QUIC_FT_PATH_CHALLENGE: + case QUIC_FT_PATH_RESPONSE: + /* TODO */ + break; case QUIC_FT_CONNECTION_CLOSE: case QUIC_FT_CONNECTION_CLOSE_APP: /* Increment the error counters */ @@ -1040,8 +1052,8 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, qc->state = QUIC_HS_ST_CONFIRMED; break; default: - TRACE_ERROR("unknosw frame type", QUIC_EV_CONN_PRSHPKT, qc); - goto leave; + /* Unknown frame type must be rejected by qc_parse_frm(). */ + ABORT_NOW(); } } @@ -1144,50 +1156,6 @@ static void qc_rm_hp_pkts(struct quic_conn *qc, struct quic_enc_level *el) TRACE_LEAVE(QUIC_EV_CONN_ELRMHP, qc); } -/* Process all the CRYPTO frame at <el> encryption level. This is the - * responsibility of the called to ensure there exists a CRYPTO data - * stream for this level. - * Return 1 if succeeded, 0 if not. - */ -int qc_treat_rx_crypto_frms(struct quic_conn *qc, struct quic_enc_level *el, - struct ssl_sock_ctx *ctx) -{ - int ret = 0; - struct ncbuf *ncbuf; - struct quic_cstream *cstream = el->cstream; - ncb_sz_t data; - - TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); - - BUG_ON(!cstream); - ncbuf = &cstream->rx.ncbuf; - if (ncb_is_null(ncbuf)) - goto done; - - /* TODO not working if buffer is wrapping */ - while ((data = ncb_data(ncbuf, 0))) { - const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf); - - if (!qc_ssl_provide_quic_data(&el->cstream->rx.ncbuf, el->level, - ctx, cdata, data)) - goto leave; - - cstream->rx.offset += data; - TRACE_DEVEL("buffered crypto data were provided to TLS stack", - QUIC_EV_CONN_PHPKTS, qc, el); - } - - done: - ret = 1; - leave: - if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) { - TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, el); - quic_free_ncbuf(ncbuf); - } - TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); - return ret; -} - /* Check if it's possible to remove header protection for packets related to * encryption level <qel>. If <qel> is NULL, assume it's false. * @@ -1317,15 +1285,6 @@ int qc_treat_rx_pkts(struct quic_conn *qc) qel->pktns->flags |= QUIC_FL_PKTNS_NEW_LARGEST_PN; } - if (qel->cstream) { - struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf; - - if (!ncb_is_null(ncbuf) && ncb_data(ncbuf, 0)) { - /* Some in order CRYPTO data were bufferized. */ - HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY); - } - } - /* Release the Initial encryption level and packet number space. */ if ((qc->flags & QUIC_FL_CONN_IPKTNS_DCD) && qel == qc->iel) { qc_enc_level_free(qc, &qc->iel); @@ -1503,7 +1462,7 @@ static inline int quic_read_uint32(uint32_t *val, if (end - *buf < sizeof *val) return 0; - *val = ntohl(*(uint32_t *)*buf); + *val = ntohl(read_u32(*buf)); *buf += sizeof *val; return 1; @@ -1728,6 +1687,9 @@ static struct quic_conn *quic_rx_pkt_retrieve_conn(struct quic_rx_packet *pkt, } } else if (!qc) { + /* Stateless Reset sent even for Long header packets as haproxy + * emits stateless_reset_token in its TPs. + */ TRACE_PROTO("RX non Initial pkt without connection", QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version); if (!send_stateless_reset(l, &dgram->saddr, pkt)) TRACE_ERROR("stateless reset not sent", QUIC_EV_CONN_LPKT, qc); diff --git a/src/quic_sock.c b/src/quic_sock.c index f796513..a3f3768 100644 --- a/src/quic_sock.c +++ b/src/quic_sock.c @@ -29,6 +29,7 @@ #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/pool.h> +#include <haproxy/protocol-t.h> #include <haproxy/proto_quic.h> #include <haproxy/proxy-t.h> #include <haproxy/quic_cid.h> @@ -337,8 +338,8 @@ static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *rbuf return prev; } -/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of - * length <len>. +/* Receive a single message from datagram socket <fd>. Data are placed in <out> + * buffer of length <len>. * * Datagram addresses will be returned via the next arguments. <from> will be * the peer address and <to> the reception one. Note that <to> can only be @@ -393,6 +394,11 @@ static ssize_t quic_recv(int fd, void *out, size_t len, if (ret < 0) goto end; + if (unlikely(port_is_restricted((struct sockaddr_storage *)from, HA_PROTO_QUIC))) { + ret = -1; + goto end; + } + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { switch (cmsg->cmsg_level) { case IPPROTO_IP: @@ -566,6 +572,86 @@ void quic_conn_sock_fd_iocb(int fd) TRACE_LEAVE(QUIC_EV_CONN_RCV, qc); } +static void cmsg_set_saddr(struct msghdr *msg, struct cmsghdr **cmsg, + struct sockaddr_storage *saddr) +{ + struct cmsghdr *c; +#ifdef IP_PKTINFO + struct in_pktinfo *in; +#endif /* IP_PKTINFO */ +#ifdef IPV6_RECVPKTINFO + struct in6_pktinfo *in6; +#endif /* IPV6_RECVPKTINFO */ + size_t sz = 0; + + /* First determine size of ancillary data depending on the system support. */ + switch (saddr->ss_family) { + case AF_INET: +#if defined(IP_PKTINFO) + sz = sizeof(struct in_pktinfo); +#elif defined(IP_RECVDSTADDR) + sz = sizeof(struct in_addr); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + break; + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + sz = sizeof(struct in6_pktinfo); +#endif /* IPV6_RECVPKTINFO */ + break; + default: + break; + } + + /* Size is null if system does not support send source address setting. */ + if (!sz) + return; + + /* Set first msg_controllen to be able to use CMSG_* macros. */ + msg->msg_controllen += CMSG_SPACE(sz); + + *cmsg = !(*cmsg) ? CMSG_FIRSTHDR(msg) : CMSG_NXTHDR(msg, *cmsg); + ALREADY_CHECKED(*cmsg); + c = *cmsg; + c->cmsg_len = CMSG_LEN(sz); + + switch (saddr->ss_family) { + case AF_INET: + c->cmsg_level = IPPROTO_IP; +#if defined(IP_PKTINFO) + c->cmsg_type = IP_PKTINFO; + in = (struct in_pktinfo *)CMSG_DATA(c); + in->ipi_ifindex = 0; + in->ipi_addr.s_addr = 0; + memcpy(&in->ipi_spec_dst, + &((struct sockaddr_in *)saddr)->sin_addr, + sizeof(struct in_addr)); +#elif defined(IP_RECVDSTADDR) + c->cmsg_type = IP_SENDSRCADDR; + memcpy(CMSG_DATA(c), + &((struct sockaddr_in *)saddr)->sin_addr, + sizeof(struct in_addr)); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + + break; + + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + c->cmsg_level = IPPROTO_IPV6; + c->cmsg_type = IPV6_PKTINFO; + in6 = (struct in6_pktinfo *)CMSG_DATA(c); + in6->ipi6_ifindex = 0; + memcpy(&in6->ipi6_addr, + &((struct sockaddr_in6 *)saddr)->sin6_addr, + sizeof(struct in6_addr)); +#endif /* IPV6_RECVPKTINFO */ + + break; + + default: + break; + } +} + /* Send a datagram stored into <buf> buffer with <sz> as size. * The caller must ensure there is at least <sz> bytes in this buffer. * @@ -581,106 +667,58 @@ int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz, int flags) { ssize_t ret; + struct msghdr msg; + struct iovec vec; + struct cmsghdr *cmsg __maybe_unused = NULL; - do { - if (qc_test_fd(qc)) { - if (!fd_send_ready(qc->fd)) - return 0; - - ret = send(qc->fd, b_peek(buf, b_head_ofs(buf)), sz, - MSG_DONTWAIT | MSG_NOSIGNAL); - } -#if defined(IP_PKTINFO) || defined(IP_RECVDSTADDR) || defined(IPV6_RECVPKTINFO) - else if (is_addr(&qc->local_addr)) { - struct msghdr msg = { 0 }; - struct iovec vec; - struct cmsghdr *cmsg; + union { #ifdef IP_PKTINFO - struct in_pktinfo in; + char buf[CMSG_SPACE(sizeof(struct in_pktinfo))]; #endif /* IP_PKTINFO */ #ifdef IPV6_RECVPKTINFO - struct in6_pktinfo in6; + char buf6[CMSG_SPACE(sizeof(struct in6_pktinfo))]; #endif /* IPV6_RECVPKTINFO */ - union { -#ifdef IP_PKTINFO - char buf[CMSG_SPACE(sizeof(in))]; -#endif /* IP_PKTINFO */ -#ifdef IPV6_RECVPKTINFO - char buf6[CMSG_SPACE(sizeof(in6))]; -#endif /* IPV6_RECVPKTINFO */ - char bufaddr[CMSG_SPACE(sizeof(struct in_addr))]; - struct cmsghdr align; - } u; - - vec.iov_base = b_peek(buf, b_head_ofs(buf)); - vec.iov_len = sz; - msg.msg_name = &qc->peer_addr; - msg.msg_namelen = get_addr_len(&qc->peer_addr); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - - switch (qc->local_addr.ss_family) { - case AF_INET: -#if defined(IP_PKTINFO) - memset(&in, 0, sizeof(in)); - memcpy(&in.ipi_spec_dst, - &((struct sockaddr_in *)&qc->local_addr)->sin_addr, - sizeof(struct in_addr)); - - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IP; - cmsg->cmsg_type = IP_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); - memcpy(CMSG_DATA(cmsg), &in, sizeof(in)); -#elif defined(IP_RECVDSTADDR) - msg.msg_control = u.bufaddr; - msg.msg_controllen = sizeof(u.bufaddr); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IP; - cmsg->cmsg_type = IP_SENDSRCADDR; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); - memcpy(CMSG_DATA(cmsg), - &((struct sockaddr_in *)&qc->local_addr)->sin_addr, - sizeof(struct in_addr)); -#endif /* IP_PKTINFO || IP_RECVDSTADDR */ - break; + char bufaddr[CMSG_SPACE(sizeof(struct in_addr))]; + struct cmsghdr align; + } ancillary_data; - case AF_INET6: -#ifdef IPV6_RECVPKTINFO - memset(&in6, 0, sizeof(in6)); - memcpy(&in6.ipi6_addr, - &((struct sockaddr_in6 *)&qc->local_addr)->sin6_addr, - sizeof(struct in6_addr)); - - msg.msg_control = u.buf6; - msg.msg_controllen = sizeof(u.buf6); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IPV6; - cmsg->cmsg_type = IPV6_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); - memcpy(CMSG_DATA(cmsg), &in6, sizeof(in6)); -#endif /* IPV6_RECVPKTINFO */ - break; + vec.iov_base = b_peek(buf, b_head_ofs(buf)); + vec.iov_len = sz; - default: - break; - } + /* man 2 sendmsg + * + * The msg_name field is used on an unconnected socket to specify the + * target address for a datagram. It points to a buffer containing the + * address; the msg_namelen field should be set to the size of the + * address. For a connected socket, these fields should be specified + * as NULL and 0, respectively. + */ + if (!qc_test_fd(qc)) { + msg.msg_name = &qc->peer_addr; + msg.msg_namelen = get_addr_len(&qc->peer_addr); + } + else { + msg.msg_name = NULL; + msg.msg_namelen = 0; + } - ret = sendmsg(qc->li->rx.fd, &msg, - MSG_DONTWAIT|MSG_NOSIGNAL); - } -#endif /* IP_PKTINFO || IP_RECVDSTADDR || IPV6_RECVPKTINFO */ - else { - ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz, - MSG_DONTWAIT|MSG_NOSIGNAL, - (struct sockaddr *)&qc->peer_addr, - get_addr_len(&qc->peer_addr)); - } + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + if (qc_test_fd(qc) && !fd_send_ready(qc->fd)) + return 0; + + /* Set source address when using listener socket if possible. */ + if (!qc_test_fd(qc) && is_addr(&qc->local_addr)) { + msg.msg_control = ancillary_data.bufaddr; + cmsg_set_saddr(&msg, &cmsg, &qc->local_addr); + } + + do { + ret = sendmsg(qc_fd(qc), &msg, MSG_DONTWAIT|MSG_NOSIGNAL); } while (ret < 0 && errno == EINTR); if (ret < 0) { @@ -740,7 +778,7 @@ int qc_rcv_buf(struct quic_conn *qc) max_sz = params->max_udp_payload_size; do { - if (!b_alloc(&buf)) + if (!b_alloc(&buf, DB_MUX_RX)) break; /* TODO subscribe for memory again available. */ b_reset(&buf); @@ -967,18 +1005,15 @@ void qc_want_recv(struct quic_conn *qc) struct quic_accept_queue *quic_accept_queues; /* Install <qc> on the queue ready to be accepted. The queue task is then woken - * up. If <qc> accept is already scheduled or done, nothing is done. + * up. */ void quic_accept_push_qc(struct quic_conn *qc) { struct quic_accept_queue *queue = &quic_accept_queues[tid]; struct li_per_thread *lthr = &qc->li->per_thr[ti->ltid]; - /* early return if accept is already in progress/done for this - * connection - */ - if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED) - return; + /* A connection must only be accepted once per instance. */ + BUG_ON(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED); BUG_ON(MT_LIST_INLIST(&qc->accept_list)); HA_ATOMIC_INC(&qc->li->rx.quic_curr_accept); diff --git a/src/quic_ssl.c b/src/quic_ssl.c index 85b6717..66eb68e 100644 --- a/src/quic_ssl.c +++ b/src/quic_ssl.c @@ -2,7 +2,6 @@ #include <haproxy/ncbuf.h> #include <haproxy/proxy.h> #include <haproxy/quic_conn.h> -#include <haproxy/quic_rx.h> #include <haproxy/quic_sock.h> #include <haproxy/quic_ssl.h> #include <haproxy/quic_tls.h> @@ -442,24 +441,25 @@ int ssl_quic_initial_ctx(struct bind_conf *bind_conf) ctx = SSL_CTX_new(TLS_server_method()); bind_conf->initial_ctx = ctx; + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); SSL_CTX_set_options(ctx, options); SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); -#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME -# if defined(HAVE_SSL_CLIENT_HELLO_CB) -# if defined(SSL_OP_NO_ANTI_REPLAY) if (bind_conf->ssl_conf.early_data) { - SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY); -# if defined(USE_QUIC_OPENSSL_COMPAT) || defined(OPENSSL_IS_AWSLC) - ha_warning("Binding [%s:%d] for %s %s: 0-RTT is not supported in limited QUIC compatibility mode, ignored.\n", +#if !defined(HAVE_SSL_0RTT_QUIC) + ha_warning("Binding [%s:%d] for %s %s: 0-RTT with QUIC is not supported by this SSL library, ignored.\n", bind_conf->file, bind_conf->line, proxy_type_str(bind_conf->frontend), bind_conf->frontend->id); -# else +#else + SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY); SSL_CTX_set_max_early_data(ctx, 0xffffffff); -# endif /* ! USE_QUIC_OPENSSL_COMPAT */ +#endif /* ! HAVE_SSL_0RTT_QUIC */ } -# endif /* !SSL_OP_NO_ANTI_REPLAY */ + +#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME +# if defined(HAVE_SSL_CLIENT_HELLO_CB) SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL); SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk); # else /* ! HAVE_SSL_CLIENT_HELLO_CB */ @@ -502,10 +502,10 @@ static forceinline void qc_ssl_dump_errors(struct connection *conn) * Remaining parameter are there for debugging purposes. * Return 1 if succeeded, 0 if not. */ -int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, - enum ssl_encryption_level_t level, - struct ssl_sock_ctx *ctx, - const unsigned char *data, size_t len) +static int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, + enum ssl_encryption_level_t level, + struct ssl_sock_ctx *ctx, + const unsigned char *data, size_t len) { #ifdef DEBUG_STRICT enum ncb_ret ncb_ret; @@ -557,6 +557,39 @@ int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, ERR_clear_error(); goto leave; } +#if defined(LIBRESSL_VERSION_NUMBER) + else if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) { + /* Some libressl versions emit TLS alerts without making the handshake + * (SSL_do_handshake()) fail. This is at least the case for + * libressl-3.9.0 when forcing the TLS cipher to TLS_AES_128_CCM_SHA256. + */ + TRACE_ERROR("SSL handshake error", QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err); + HA_ATOMIC_INC(&qc->prx_counters->hdshk_fail); + goto leave; + } +#endif + +#if defined(OPENSSL_IS_AWSLC) + /* As a server, if early data is accepted, SSL_do_handshake will + * complete as soon as the ClientHello is processed and server flight sent. + * SSL_write may be used to send half-RTT data. SSL_read will consume early + * data and transition to 1-RTT data as appropriate. Prior to the + * transition, SSL_in_init will report the handshake is still in progress. + * Callers may use it or SSL_in_early_data to defer or reject requests + * as needed. + * (see https://commondatastorage.googleapis.com/chromium-boringssl-docs/ssl.h.html#Early-data) + */ + + /* If we do not returned here, the handshake is considered as completed/confirmed. + * This has as bad side effect to discard the Handshake packet number space, + * so without sending the Handshake level CRYPTO data. + */ + if (SSL_in_early_data(ctx->ssl)) { + TRACE_PROTO("SSL handshake in progrees with early data", + QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err); + goto out; + } +#endif TRACE_PROTO("SSL handshake OK", QUIC_EV_CONN_IO_CB, qc, &state); @@ -572,8 +605,17 @@ int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, if (qc_is_listener(ctx->qc)) { qc->flags |= QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS; qc->state = QUIC_HS_ST_CONFIRMED; - /* The connection is ready to be accepted. */ - quic_accept_push_qc(qc); + + if (!(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)) { + quic_accept_push_qc(qc); + } + else { + /* Connection already accepted if 0-RTT used. + * In this case, schedule quic-conn to ensure + * post-handshake frames are emitted. + */ + tasklet_wakeup(qc->wait_event.tasklet); + } BUG_ON(qc->li->rx.quic_curr_handshake == 0); HA_ATOMIC_DEC(&qc->li->rx.quic_curr_handshake); @@ -636,6 +678,8 @@ int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx) { int ret = 0; struct quic_enc_level *qel; + struct ncbuf *ncbuf; + ncb_sz_t data; TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); list_for_each_entry(qel, &qc->qel_list, list) { @@ -644,8 +688,27 @@ int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx) if (!cstream) continue; - if (!qc_treat_rx_crypto_frms(qc, qel, ctx)) - goto leave; + ncbuf = &cstream->rx.ncbuf; + if (ncb_is_null(ncbuf)) + continue; + + /* TODO not working if buffer is wrapping */ + while ((data = ncb_data(ncbuf, 0))) { + const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf); + + if (!qc_ssl_provide_quic_data(&qel->cstream->rx.ncbuf, qel->level, + ctx, cdata, data)) + goto leave; + + cstream->rx.offset += data; + TRACE_DEVEL("buffered crypto data were provided to TLS stack", + QUIC_EV_CONN_PHPKTS, qc, qel); + } + + if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) { + TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, qel); + quic_free_ncbuf(ncbuf); + } } ret = 1; @@ -696,6 +759,43 @@ static int qc_ssl_sess_init(struct quic_conn *qc, SSL_CTX *ssl_ctx, SSL **ssl) return ret; } +#ifdef HAVE_SSL_0RTT_QUIC + +/* Enable early data for <ssl> QUIC TLS session. + * Return 1 if succeeded, 0 if not. + */ +static int qc_set_quic_early_data_enabled(struct quic_conn *qc, SSL *ssl) +{ +#if defined(OPENSSL_IS_AWSLC) + struct quic_transport_params p = {0}; + unsigned char buf[128]; + size_t len; + + /* Apply default values to <p> transport parameters. */ + quic_transport_params_init(&p, 1); + /* The stateless_reset_token transport parameter is not needed. */ + p.with_stateless_reset_token = 0; + len = quic_transport_params_encode(buf, buf + sizeof buf, &p, NULL, 1); + if (!len) { + TRACE_ERROR("quic_transport_params_encode() failed", QUIC_EV_CONN_RWSEC, qc); + return 0; + } + + /* XXX TODO: Should also add the application settings. XXX */ + if (!SSL_set_quic_early_data_context(ssl, buf, len)) { + TRACE_ERROR("SSL_set_quic_early_data_context() failed", QUIC_EV_CONN_RWSEC, qc); + return 0; + } + + SSL_set_early_data_enabled(ssl, 1); +#else + SSL_set_quic_early_data_enabled(ssl, 1); +#endif + + return 1; +} +#endif // HAVE_SSL_0RTT_QUIC + /* Allocate the ssl_sock_ctx from connection <qc>. This creates the tasklet * used to process <qc> received packets. The allocated context is stored in * <qc.xprt_ctx>. @@ -731,12 +831,10 @@ int qc_alloc_ssl_sock_ctx(struct quic_conn *qc) if (qc_is_listener(qc)) { if (qc_ssl_sess_init(qc, bc->initial_ctx, &ctx->ssl) == -1) goto err; -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) && !defined(OPENSSL_IS_AWSLC) -#ifndef USE_QUIC_OPENSSL_COMPAT +#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) && defined(HAVE_SSL_0RTT_QUIC) /* Enabling 0-RTT */ - if (bc->ssl_conf.early_data) - SSL_set_quic_early_data_enabled(ctx->ssl, 1); -#endif + if (bc->ssl_conf.early_data && !qc_set_quic_early_data_enabled(qc, ctx->ssl)) + goto err; #endif SSL_set_accept_state(ctx->ssl); diff --git a/src/quic_stats.c b/src/quic_stats.c index 3657f30..9d9b343 100644 --- a/src/quic_stats.c +++ b/src/quic_stats.c @@ -2,7 +2,7 @@ #include <haproxy/quic_stats-t.h> #include <haproxy/stats.h> -static struct name_desc quic_stats[] = { +static struct stat_col quic_stats[] = { [QUIC_ST_RXBUF_FULL] = { .name = "quic_rxbuf_full", .desc = "Total number of cancelled reception due to full receiver buffer" }, [QUIC_ST_DROPPED_PACKET] = { .name = "quic_dropped_pkt", @@ -90,53 +90,154 @@ static struct name_desc quic_stats[] = { struct quic_counters quic_counters; -static void quic_fill_stats(void *data, struct field *stats) +static int quic_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct quic_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[QUIC_ST_RXBUF_FULL] = mkf_u64(FN_COUNTER, counters->rxbuf_full); - stats[QUIC_ST_DROPPED_PACKET] = mkf_u64(FN_COUNTER, counters->dropped_pkt); - stats[QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun); - stats[QUIC_ST_DROPPED_PARSING] = mkf_u64(FN_COUNTER, counters->dropped_parsing); - stats[QUIC_ST_SOCKET_FULL] = mkf_u64(FN_COUNTER, counters->socket_full); - stats[QUIC_ST_SENDTO_ERR] = mkf_u64(FN_COUNTER, counters->sendto_err); - stats[QUIC_ST_SENDTO_ERR_UNKNWN] = mkf_u64(FN_COUNTER, counters->sendto_err_unknown); - stats[QUIC_ST_SENT_PACKET] = mkf_u64(FN_COUNTER, counters->sent_pkt); - stats[QUIC_ST_LOST_PACKET] = mkf_u64(FN_COUNTER, counters->lost_pkt); - stats[QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram); - stats[QUIC_ST_RETRY_SENT] = mkf_u64(FN_COUNTER, counters->retry_sent); - stats[QUIC_ST_RETRY_VALIDATED] = mkf_u64(FN_COUNTER, counters->retry_validated); - stats[QUIC_ST_RETRY_ERRORS] = mkf_u64(FN_COUNTER, counters->retry_error); - stats[QUIC_ST_HALF_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->half_open_conn); - stats[QUIC_ST_HDSHK_FAIL] = mkf_u64(FN_COUNTER, counters->hdshk_fail); - stats[QUIC_ST_STATELESS_RESET_SENT] = mkf_u64(FN_COUNTER, counters->stateless_reset_sent); - /* Special events of interest */ - stats[QUIC_ST_CONN_MIGRATION_DONE] = mkf_u64(FN_COUNTER, counters->conn_migration_done); - /* Transport errors */ - stats[QUIC_ST_TRANSP_ERR_NO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error); - stats[QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error); - stats[QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused); - stats[QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error); - stats[QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error); - stats[QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error); - stats[QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error); - stats[QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error); - stats[QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error); - stats[QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit); - stats[QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation); - stats[QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token); - stats[QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error); - stats[QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded); - stats[QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error); - stats[QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached); - stats[QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path); - stats[QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error); - stats[QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error); - /* Streams related counters */ - stats[QUIC_ST_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->data_blocked); - stats[QUIC_ST_STREAM_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->stream_data_blocked); - stats[QUIC_ST_STREAMS_BLOCKED_BIDI] = mkf_u64(FN_COUNTER, counters->streams_blocked_bidi); - stats[QUIC_ST_STREAMS_BLOCKED_UNI] = mkf_u64(FN_COUNTER, counters->streams_blocked_uni); + for (; current_field < QUIC_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case QUIC_ST_RXBUF_FULL: + metric = mkf_u64(FN_COUNTER, counters->rxbuf_full); + break; + case QUIC_ST_DROPPED_PACKET: + metric = mkf_u64(FN_COUNTER, counters->dropped_pkt); + break; + case QUIC_ST_DROPPED_PACKET_BUFOVERRUN: + metric = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun); + break; + case QUIC_ST_DROPPED_PARSING: + metric = mkf_u64(FN_COUNTER, counters->dropped_parsing); + break; + case QUIC_ST_SOCKET_FULL: + metric = mkf_u64(FN_COUNTER, counters->socket_full); + break; + case QUIC_ST_SENDTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->sendto_err); + break; + case QUIC_ST_SENDTO_ERR_UNKNWN: + metric = mkf_u64(FN_COUNTER, counters->sendto_err_unknown); + break; + case QUIC_ST_SENT_PACKET: + metric = mkf_u64(FN_COUNTER, counters->sent_pkt); + break; + case QUIC_ST_LOST_PACKET: + metric = mkf_u64(FN_COUNTER, counters->lost_pkt); + break; + case QUIC_ST_TOO_SHORT_INITIAL_DGRAM: + metric = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram); + break; + case QUIC_ST_RETRY_SENT: + metric = mkf_u64(FN_COUNTER, counters->retry_sent); + break; + case QUIC_ST_RETRY_VALIDATED: + metric = mkf_u64(FN_COUNTER, counters->retry_validated); + break; + case QUIC_ST_RETRY_ERRORS: + metric = mkf_u64(FN_COUNTER, counters->retry_error); + break; + case QUIC_ST_HALF_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->half_open_conn); + break; + case QUIC_ST_HDSHK_FAIL: + metric = mkf_u64(FN_COUNTER, counters->hdshk_fail); + break; + case QUIC_ST_STATELESS_RESET_SENT: + metric = mkf_u64(FN_COUNTER, counters->stateless_reset_sent); + break; + + /* Special events of interest */ + case QUIC_ST_CONN_MIGRATION_DONE: + metric = mkf_u64(FN_COUNTER, counters->conn_migration_done); + break; + + /* Transport errors */ + case QUIC_ST_TRANSP_ERR_NO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error); + break; + case QUIC_ST_TRANSP_ERR_INTERNAL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error); + break; + case QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused); + break; + case QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error); + break; + case QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error); + break; + case QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error); + break; + case QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error); + break; + case QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error); + break; + case QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error); + break; + case QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit); + break; + case QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation); + break; + case QUIC_ST_TRANSP_ERR_INVALID_TOKEN: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token); + break; + case QUIC_ST_TRANSP_ERR_APPLICATION_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error); + break; + case QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded); + break; + case QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error); + break; + case QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached); + break; + case QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path); + break; + case QUIC_ST_TRANSP_ERR_CRYPTO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error); + break; + case QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error); + break; + + /* Streams related counters */ + case QUIC_ST_DATA_BLOCKED: + metric = mkf_u64(FN_COUNTER, counters->data_blocked); + break; + case QUIC_ST_STREAM_DATA_BLOCKED: + metric = mkf_u64(FN_COUNTER, counters->stream_data_blocked); + break; + case QUIC_ST_STREAMS_BLOCKED_BIDI: + metric = mkf_u64(FN_COUNTER, counters->streams_blocked_bidi); + break; + case QUIC_ST_STREAMS_BLOCKED_UNI: + metric = mkf_u64(FN_COUNTER, counters->streams_blocked_uni); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } struct stats_module quic_stats_module = { diff --git a/src/quic_stream.c b/src/quic_stream.c index a4b984d..e153660 100644 --- a/src/quic_stream.c +++ b/src/quic_stream.c @@ -6,7 +6,7 @@ #include <haproxy/buf.h> #include <haproxy/dynbuf.h> #include <haproxy/list.h> -#include <haproxy/mux_quic-t.h> +#include <haproxy/mux_quic.h> #include <haproxy/pool.h> #include <haproxy/quic_conn.h> #include <haproxy/task.h> @@ -37,10 +37,13 @@ static void qc_stream_buf_free(struct qc_stream_desc *stream, /* notify MUX about available buffers. */ --qc->stream_buf_count; if (qc->mux_state == QC_MUX_READY) { - if (qc->qcc->flags & QC_CF_CONN_FULL) { - qc->qcc->flags &= ~QC_CF_CONN_FULL; - tasklet_wakeup(qc->qcc->wait_event.tasklet); - } + /* notify MUX about available buffers. + * + * TODO several streams may be woken up even if a single buffer + * is available for now. + */ + while (qcc_notify_buf(qc->qcc)) + ; } } @@ -202,11 +205,13 @@ void qc_stream_desc_free(struct qc_stream_desc *stream, int closing) qc->stream_buf_count -= free_count; if (qc->mux_state == QC_MUX_READY) { - /* notify MUX about available buffers. */ - if (qc->qcc->flags & QC_CF_CONN_FULL) { - qc->qcc->flags &= ~QC_CF_CONN_FULL; - tasklet_wakeup(qc->qcc->wait_event.tasklet); - } + /* notify MUX about available buffers. + * + * TODO several streams may be woken up even if a single buffer + * is available for now. + */ + while (qcc_notify_buf(qc->qcc)) + ; } } diff --git a/src/quic_tls.c b/src/quic_tls.c index aa72831..885df6f 100644 --- a/src/quic_tls.c +++ b/src/quic_tls.c @@ -206,8 +206,9 @@ static int quic_conn_enc_level_init(struct quic_conn *qc, if (!qel) goto leave; - LIST_INIT(&qel->retrans); - qel->retrans_frms = NULL; + LIST_INIT(&qel->el_send); + qel->send_frms = NULL; + qel->tx.crypto.bufs = NULL; qel->tx.crypto.nb_buf = 0; qel->cstream = NULL; diff --git a/src/quic_tp.c b/src/quic_tp.c index caf48ce..08d24b2 100644 --- a/src/quic_tp.c +++ b/src/quic_tp.c @@ -171,23 +171,23 @@ static int quic_transport_param_dec_version_info(struct tp_version_information * const unsigned char *end, int server) { size_t tp_len = end - *buf; - const uint32_t *ver, *others; + const unsigned char *ver, *others; /* <tp_len> must be a multiple of sizeof(uint32_t) */ if (tp_len < sizeof tp->chosen || (tp_len & 0x3)) return 0; - tp->chosen = ntohl(*(uint32_t *)*buf); + tp->chosen = ntohl(read_u32(*buf)); /* Must not be null */ if (!tp->chosen) return 0; *buf += sizeof tp->chosen; - others = (const uint32_t *)*buf; + others = *buf; /* Others versions must not be null */ - for (ver = others; ver < (const uint32_t *)end; ver++) { - if (!*ver) + for (ver = others; ver < end; ver += 4) { + if (!read_u32(ver)) return 0; } @@ -195,19 +195,19 @@ static int quic_transport_param_dec_version_info(struct tp_version_information * /* TODO: not supported */ return 0; - for (ver = others; ver < (const uint32_t *)end; ver++) { + for (ver = others; ver < end; ver += 4) { if (!tp->negotiated_version) { int i; for (i = 0; i < quic_versions_nb; i++) { - if (ntohl(*ver) == quic_versions[i].num) { + if (ntohl(read_u32(ver)) == quic_versions[i].num) { tp->negotiated_version = &quic_versions[i]; break; } } } - if (preferred_version && ntohl(*ver) == preferred_version->num) { + if (preferred_version && ntohl(read_u32(ver)) == preferred_version->num) { tp->negotiated_version = preferred_version; goto out; } @@ -565,7 +565,7 @@ int quic_transport_params_encode(unsigned char *buf, p->active_connection_id_limit)) return 0; - if (!quic_transport_param_enc_version_info(&pos, end, chosen_version, server)) + if (chosen_version && !quic_transport_param_enc_version_info(&pos, end, chosen_version, server)) return 0; return pos - head; diff --git a/src/quic_tx.c b/src/quic_tx.c index 306b4c2..6d487eb 100644 --- a/src/quic_tx.c +++ b/src/quic_tx.c @@ -88,7 +88,7 @@ static inline void free_quic_tx_packet(struct quic_conn *qc, struct buffer *qc_txb_alloc(struct quic_conn *qc) { struct buffer *buf = &qc->tx.buf; - if (!b_alloc(buf)) + if (!b_alloc(buf, DB_MUX_TX)) return NULL; return buf; @@ -202,104 +202,6 @@ static int qc_may_build_pkt(struct quic_conn *qc, struct list *frms, return 1; } -/* Prepare as much as possible QUIC packets for sending from prebuilt frames - * <frms>. Each packet is stored in a distinct datagram written to <buf>. - * - * Each datagram is prepended by a two fields header : the datagram length and - * the address of the packet contained in the datagram. - * - * Returns the number of bytes prepared in packets if succeeded (may be 0), or - * -1 if something wrong happened. - */ -static int qc_prep_app_pkts(struct quic_conn *qc, struct buffer *buf, - struct list *frms) -{ - int ret = -1, cc; - struct quic_enc_level *qel; - unsigned char *end, *pos; - struct quic_tx_packet *pkt; - size_t total; - - TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); - - qel = qc->ael; - total = 0; - pos = (unsigned char *)b_tail(buf); - cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE; - /* Each datagram is prepended with its length followed by the address - * of the first packet in the datagram (QUIC_DGRAM_HEADLEN). - */ - while ((!cc && b_contig_space(buf) >= (int)qc->path->mtu + QUIC_DGRAM_HEADLEN) || - (cc && b_contig_space(buf) >= QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN)) { - int err, probe, must_ack; - - TRACE_PROTO("TX prep app pkts", QUIC_EV_CONN_PHPKTS, qc, qel, frms); - probe = 0; - /* We do not probe if an immediate close was asked */ - if (!cc) - probe = qel->pktns->tx.pto_probe; - - if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) - break; - - /* Leave room for the datagram header */ - pos += QUIC_DGRAM_HEADLEN; - if (cc) { - end = pos + QUIC_MIN_CC_PKTSIZE; - } - else if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) { - end = pos + QUIC_MIN(qc->path->mtu, quic_may_send_bytes(qc)); - } - else { - end = pos + qc->path->mtu; - } - - pkt = qc_build_pkt(&pos, end, qel, &qel->tls_ctx, frms, qc, NULL, 0, - QUIC_PACKET_TYPE_SHORT, must_ack, 0, probe, cc, &err); - switch (err) { - case -3: - qc_purge_txbuf(qc, buf); - goto leave; - case -2: - // trace already emitted by function above - goto leave; - case -1: - /* As we provide qc_build_pkt() with an enough big buffer to fulfill an - * MTU, we are here because of the congestion control window. There is - * no need to try to reuse this buffer. - */ - TRACE_PROTO("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc, qel); - goto out; - default: - break; - } - - /* This is to please to GCC. We cannot have (err >= 0 && !pkt) */ - BUG_ON(!pkt); - - if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA) - pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA; - - total += pkt->len; - - /* Write datagram header. */ - qc_txb_store(buf, pkt->len, pkt); - /* Build only one datagram when an immediate close is required. */ - if (cc) - break; - } - - out: - if (total && cc) { - BUG_ON(buf != &qc->tx.cc_buf); - qc->tx.cc_dgram_len = total; - } - ret = total; - leave: - TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); - return ret; -} - /* Free all frames in <l> list. In addition also remove all these frames * from the original ones if they are the results of duplications. */ @@ -362,7 +264,7 @@ static void qc_purge_tx_buf(struct quic_conn *qc, struct buffer *buf) * Remaining data are purged from the buffer and will eventually be detected * as lost which gives the opportunity to retry sending. */ -int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) +static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) { int ret = 0; struct quic_conn *qc; @@ -427,6 +329,7 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) time_sent = now_ms; for (pkt = first_pkt; pkt; pkt = next_pkt) { + struct quic_cc *cc = &qc->path->cc; /* RFC 9000 14.1 Initial datagram size * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting * Initial packets to at least the smallest allowed maximum datagram size of @@ -466,6 +369,8 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) } qc->path->in_flight += pkt->in_flight_len; pkt->pktns->tx.in_flight += pkt->in_flight_len; + if ((global.tune.options & GTUNE_QUIC_CC_HYSTART) && pkt->pktns == qc->apktns) + cc->algo->hystart_start_round(cc, pkt->pn_node.key); if (pkt->in_flight_len) qc_set_timer(qc); TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt); @@ -510,94 +415,14 @@ int qc_purge_txbuf(struct quic_conn *qc, struct buffer *buf) return 1; } -/* Try to send application frames from list <frms> on connection <qc>. - * - * Use qc_send_app_probing wrapper when probing with old data. - * - * Returns 1 on success. Some data might not have been sent due to congestion, - * in this case they are left in <frms> input list. The caller may subscribe on - * quic-conn to retry later. - * - * Returns 0 on critical error. - * TODO review and classify more distinctly transient from definitive errors to - * allow callers to properly handle it. - */ -int qc_send_app_pkts(struct quic_conn *qc, struct list *frms) -{ - int status = 0, ret; - struct buffer *buf; - - TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); - - buf = qc_get_txb(qc); - if (!buf) { - TRACE_ERROR("could not get a buffer", QUIC_EV_CONN_TXPKT, qc); - goto err; - } - - if (b_data(buf) && !qc_purge_txbuf(qc, buf)) - goto err; - - /* Prepare and send packets until we could not further prepare packets. */ - do { - /* Currently buf cannot be non-empty at this stage. Even if a - * previous sendto() has failed it is emptied to simulate - * packet emission and rely on QUIC lost detection to try to - * emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); - - ret = qc_prep_app_pkts(qc, buf, frms); - - if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); - goto err; - } - } while (ret > 0); - - qc_txb_release(qc); - if (ret < 0) - goto err; - - status = 1; - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); - return status; - - err: - TRACE_DEVEL("leaving in error", QUIC_EV_CONN_TXPKT, qc); - return 0; -} - -/* Try to send application frames from list <frms> on connection <qc>. Use this - * function when probing is required. - * - * Returns the result from qc_send_app_pkts function. - */ -static forceinline int qc_send_app_probing(struct quic_conn *qc, - struct list *frms) -{ - int ret; - - TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); - - TRACE_PROTO("preparing old data (probing)", QUIC_EV_CONN_FRMLIST, qc, frms); - qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA; - ret = qc_send_app_pkts(qc, frms); - qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; - - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); - return ret; -} - /* Try to send application frames from list <frms> on connection <qc>. This * function is provided for MUX upper layer usage only. * - * Returns the result from qc_send_app_pkts function. + * Returns the result from qc_send() function. */ int qc_send_mux(struct quic_conn *qc, struct list *frms) { + struct list send_list = LIST_HEAD_INIT(send_list); int ret; TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); @@ -613,56 +438,27 @@ int qc_send_mux(struct quic_conn *qc, struct list *frms) if ((qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) && qc->state >= QUIC_HS_ST_COMPLETE) { quic_build_post_handshake_frames(qc); - qc_send_app_pkts(qc, &qc->ael->pktns->tx.frms); + qel_register_send(&send_list, qc->ael, &qc->ael->pktns->tx.frms); + qc_send(qc, 0, &send_list); } TRACE_STATE("preparing data (from MUX)", QUIC_EV_CONN_TXPKT, qc); qc->flags |= QUIC_FL_CONN_TX_MUX_CONTEXT; - ret = qc_send_app_pkts(qc, frms); + qel_register_send(&send_list, qc->ael, frms); + ret = qc_send(qc, 0, &send_list); qc->flags &= ~QUIC_FL_CONN_TX_MUX_CONTEXT; TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); return ret; } -/* Return the encryption level following the one which contains <el> list head - * depending on <retrans> TX mode (retranmission or not). - */ -static inline struct quic_enc_level *qc_list_next_qel(struct list *el, int retrans) -{ - return !retrans ? LIST_NEXT(el, struct quic_enc_level *, list) : - LIST_NEXT(el, struct quic_enc_level *, retrans); -} - -/* Return the encryption level following <qel> depending on <retrans> TX mode - * (retranmission or not). +/* Select <*tls_ctx> and <*ver> for the encryption level <qel> of <qc> QUIC + * connection, depending on its state, especially the negotiated version. */ -static inline struct quic_enc_level *qc_next_qel(struct quic_enc_level *qel, int retrans) -{ - struct list *el = !retrans ? &qel->list : &qel->retrans; - - return qc_list_next_qel(el, retrans); -} - -/* Return 1 if <qel> is at the head of its list, 0 if not. */ -static inline int qc_qel_is_head(struct quic_enc_level *qel, struct list *l, - int retrans) -{ - return !retrans ? &qel->list == l : &qel->retrans == l; -} - -/* Select <*tls_ctx>, <*frms> and <*ver> for the encryption level <qel> of <qc> QUIC - * connection, depending on its state, especially the negotiated version and if - * retransmissions are required. If this the case <qels> is the list of encryption - * levels to used, or NULL if no retransmissions are required. - * Never fails. - */ -static inline void qc_select_tls_frms_ver(struct quic_conn *qc, - struct quic_enc_level *qel, - struct quic_tls_ctx **tls_ctx, - struct list **frms, - const struct quic_version **ver, - struct list *qels) +static inline void qc_select_tls_ver(struct quic_conn *qc, + struct quic_enc_level *qel, + struct quic_tls_ctx **tls_ctx, + const struct quic_version **ver) { if (qc->negotiated_version) { *ver = qc->negotiated_version; @@ -675,18 +471,11 @@ static inline void qc_select_tls_frms_ver(struct quic_conn *qc, *ver = qc->original_version; *tls_ctx = &qel->tls_ctx; } - - if (!qels) - *frms = &qel->pktns->tx.frms; - else - *frms = qel->retrans_frms; } /* Prepare as much as possible QUIC datagrams/packets for sending from <qels> * list of encryption levels. Several packets can be coalesced into a single - * datagram. The result is written into <buf>. Note that if <qels> is NULL, - * the encryption levels which will be used are those currently allocated - * and attached to the connection. + * datagram. The result is written into <buf>. * * Each datagram is prepended by a two fields header : the datagram length and * the address of first packet in the datagram. @@ -694,15 +483,15 @@ static inline void qc_select_tls_frms_ver(struct quic_conn *qc, * Returns the number of bytes prepared in datragrams/packets if succeeded * (may be 0), or -1 if something wrong happened. */ -int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) +static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, + struct list *qels) { - int ret, cc, retrans, padding; + int ret, cc, padding; struct quic_tx_packet *first_pkt, *prv_pkt; unsigned char *end, *pos; uint16_t dglen; size_t total; - struct list *qel_list; - struct quic_enc_level *qel; + struct quic_enc_level *qel, *tmp_qel; TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc); /* Currently qc_prep_pkts() does not handle buffer wrapping so the @@ -712,32 +501,34 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) ret = -1; cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE; - retrans = !!qels; padding = 0; first_pkt = prv_pkt = NULL; end = pos = (unsigned char *)b_head(buf); dglen = 0; total = 0; - qel_list = qels ? qels : &qc->qel_list; - qel = qc_list_next_qel(qel_list, retrans); - while (!qc_qel_is_head(qel, qel_list, retrans)) { + list_for_each_entry_safe(qel, tmp_qel, qels, el_send) { struct quic_tls_ctx *tls_ctx; const struct quic_version *ver; - struct list *frms, *next_frms; + struct list *frms = qel->send_frms, *next_frms; struct quic_enc_level *next_qel; if (qel == qc->eel) { /* Next encryption level */ - qel = qc_next_qel(qel, retrans); continue; } - qc_select_tls_frms_ver(qc, qel, &tls_ctx, &frms, &ver, qels); + qc_select_tls_ver(qc, qel, &tls_ctx, &ver); - next_qel = qc_next_qel(qel, retrans); - next_frms = qc_qel_is_head(next_qel, qel_list, retrans) ? NULL : - !qels ? &next_qel->pktns->tx.frms : next_qel->retrans_frms; + /* Retrieve next QEL. Set it to NULL if on qels last element. */ + if (qel->el_send.n != qels) { + next_qel = LIST_ELEM(qel->el_send.n, struct quic_enc_level *, el_send); + next_frms = next_qel->send_frms; + } + else { + next_qel = NULL; + next_frms = NULL; + } /* Build as much as datagrams at <qel> encryption level. * Each datagram is prepended with its length followed by the address @@ -756,7 +547,11 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) probe = qel->pktns->tx.pto_probe; if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) { - if (prv_pkt && qc_qel_is_head(next_qel, qel_list, retrans)) { + /* Remove qel from send_list if nothing to send. */ + LIST_DEL_INIT(&qel->el_send); + qel->send_frms = NULL; + + if (prv_pkt && !next_qel) { qc_txb_store(buf, dglen, first_pkt); /* Build only one datagram when an immediate close is required. */ if (cc) @@ -852,15 +647,13 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) * the same datagram, except if <qel> is the Application data * encryption level which cannot be selected to do that. */ - if (LIST_ISEMPTY(frms) && qel != qc->ael && - !qc_qel_is_head(next_qel, qel_list, retrans)) { + if (LIST_ISEMPTY(frms) && qel != qc->ael && next_qel) { if (qel == qc->iel && (!qc_is_listener(qc) || cur_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) padding = 1; prv_pkt = cur_pkt; - break; } else { qc_txb_store(buf, dglen, first_pkt); @@ -873,9 +666,6 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) prv_pkt = NULL; } } - - /* Next encryption level */ - qel = next_qel; } out: @@ -891,24 +681,25 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) return ret; } -/* Sends handshake packets from up to two encryption levels <tel> and <next_te> - * with <tel_frms> and <next_tel_frms> as frame list respectively for <qc> - * QUIC connection. <old_data> is used as boolean to send data already sent but - * not already acknowledged (in flight). - * Returns 1 if succeeded, 0 if not. +/* Encode frames and send them as packets for <qc> connection. Input frames are + * specified via quic_enc_level <send_list> through their send_frms member. Set + * <old_data> when reemitted duplicated data. + * +* Returns 1 on success else 0. Note that <send_list> will always be reset +* after qc_send() exit. */ -int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data, - struct quic_enc_level *qel1, struct quic_enc_level *qel2) +int qc_send(struct quic_conn *qc, int old_data, struct list *send_list) { + struct quic_enc_level *qel, *tmp_qel; int ret, status = 0; - struct buffer *buf = qc_get_txb(qc); - struct list qels = LIST_HEAD_INIT(qels); + struct buffer *buf; TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); + buf = qc_get_txb(qc); if (!buf) { TRACE_ERROR("buffer allocation failed", QUIC_EV_CONN_TXPKT, qc); - goto leave; + goto out; } if (b_data(buf) && !qc_purge_txbuf(qc, buf)) { @@ -916,63 +707,75 @@ int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data, goto out; } - /* Currently buf cannot be non-empty at this stage. Even if a previous - * sendto() has failed it is emptied to simulate packet emission and - * rely on QUIC lost detection to try to emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); - if (old_data) { TRACE_STATE("old data for probing asked", QUIC_EV_CONN_TXPKT, qc); qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA; } - if (qel1) { - BUG_ON(LIST_INLIST(&qel1->retrans)); - LIST_APPEND(&qels, &qel1->retrans); - } + /* Prepare and send packets until we could not further prepare packets. */ + do { + /* Buffer must always be empty before qc_prep_pkts() usage. + * qc_send_ppkts() ensures it is cleared on success. + */ + BUG_ON_HOT(b_data(buf)); + b_reset(buf); - if (qel2) { - BUG_ON(LIST_INLIST(&qel2->retrans)); - LIST_APPEND(&qels, &qel2->retrans); - } + ret = qc_prep_pkts(qc, buf, send_list); - ret = qc_prep_hpkts(qc, buf, &qels); - if (ret == -1) { - qc_txb_release(qc); - TRACE_ERROR("Could not build some packets", QUIC_EV_CONN_TXPKT, qc); - goto out; - } + if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) { + if (qc->flags & QUIC_FL_CONN_TO_KILL) + qc_txb_release(qc); + goto out; + } + } while (ret > 0 && !LIST_ISEMPTY(send_list)); - if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); - TRACE_ERROR("Could not send some packets", QUIC_EV_CONN_TXPKT, qc); + qc_txb_release(qc); + if (ret < 0) goto out; - } - qc_txb_release(qc); status = 1; out: - if (qel1) { - LIST_DEL_INIT(&qel1->retrans); - qel1->retrans_frms = NULL; + if (old_data) { + TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc); + qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; } - if (qel2) { - LIST_DEL_INIT(&qel2->retrans); - qel2->retrans_frms = NULL; + /* Always reset QEL sending list. */ + list_for_each_entry_safe(qel, tmp_qel, send_list, el_send) { + LIST_DEL_INIT(&qel->el_send); + qel->send_frms = NULL; } - TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc); - qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; - leave: - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); + TRACE_DEVEL((status ? "leaving" : "leaving in error"), QUIC_EV_CONN_TXPKT, qc); return status; } +/* Insert <qel> into <send_list> in preparation for sending. Set its send + * frames list pointer to <frms>. + */ +void qel_register_send(struct list *send_list, struct quic_enc_level *qel, + struct list *frms) +{ + /* Ensure QEL is not already registered for sending. */ + BUG_ON(LIST_INLIST(&qel->el_send)); + + LIST_APPEND(send_list, &qel->el_send); + qel->send_frms = frms; +} + +/* Returns true if <qel> should be registered for sending. This is the case if + * frames are prepared, probing is set, <qc> ACK timer has fired or a + * CONNECTION_CLOSE is required. + */ +int qel_need_sending(struct quic_enc_level *qel, struct quic_conn *qc) +{ + return !LIST_ISEMPTY(&qel->pktns->tx.frms) || + qel->pktns->tx.pto_probe || + (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) || + (qc->flags & (QUIC_FL_CONN_ACK_TIMER_FIRED|QUIC_FL_CONN_IMMEDIATE_CLOSE)); +} + /* Retransmit up to two datagrams depending on packet number space. * Return 0 when failed, 0 if not. */ @@ -993,9 +796,9 @@ int qc_dgrams_retransmit(struct quic_conn *qc) int i; for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list ifrms = LIST_HEAD_INIT(ifrms); struct list hfrms = LIST_HEAD_INIT(hfrms); - struct list qels = LIST_HEAD_INIT(qels); qc_prep_hdshk_fast_retrans(qc, &ifrms, &hfrms); TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &ifrms); @@ -1004,24 +807,25 @@ int qc_dgrams_retransmit(struct quic_conn *qc) ipktns->tx.pto_probe = 1; if (!LIST_ISEMPTY(&hfrms)) hpktns->tx.pto_probe = 1; - qc->iel->retrans_frms = &ifrms; + + qel_register_send(&send_list, qc->iel, &ifrms); if (qc->hel) - qc->hel->retrans_frms = &hfrms; - sret = qc_send_hdshk_pkts(qc, 1, qc->iel, qc->hel); + qel_register_send(&send_list, qc->hel, &hfrms); + + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &ifrms); qc_free_frm_list(qc, &hfrms); if (!sret) goto leave; } else { - /* We are in the case where the anti-amplification limit will be - * reached after having sent this datagram or some handshake frames - * could not be allocated. There is no need to send more than one - * datagram. + /* No frame to send due to amplification limit + * or allocation failure. A PING frame will be + * emitted for probing. */ ipktns->tx.pto_probe = 1; - qc->iel->retrans_frms = &ifrms; - sret = qc_send_hdshk_pkts(qc, 0, qc->iel, NULL); + qel_register_send(&send_list, qc->iel, &ifrms); + sret = qc_send(qc, 0, &send_list); qc_free_frm_list(qc, &ifrms); qc_free_frm_list(qc, &hfrms); if (!sret) @@ -1042,14 +846,15 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (hpktns && (hpktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) { hpktns->tx.pto_probe = 0; for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list frms1 = LIST_HEAD_INIT(frms1); qc_prep_fast_retrans(qc, hpktns, &frms1, NULL); TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1); if (!LIST_ISEMPTY(&frms1)) { hpktns->tx.pto_probe = 1; - qc->hel->retrans_frms = &frms1; - sret = qc_send_hdshk_pkts(qc, 1, qc->hel, NULL); + qel_register_send(&send_list, qc->hel, &frms1); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms1); if (!sret) goto leave; @@ -1060,6 +865,7 @@ int qc_dgrams_retransmit(struct quic_conn *qc) hpktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED; } else if (apktns && (apktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list frms2 = LIST_HEAD_INIT(frms2); struct list frms1 = LIST_HEAD_INIT(frms1); @@ -1070,7 +876,8 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (!LIST_ISEMPTY(&frms1)) { apktns->tx.pto_probe = 1; - sret = qc_send_app_probing(qc, &frms1); + qel_register_send(&send_list, qc->ael, &frms1); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms1); if (!sret) { qc_free_frm_list(qc, &frms2); @@ -1080,7 +887,8 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (!LIST_ISEMPTY(&frms2)) { apktns->tx.pto_probe = 1; - sret = qc_send_app_probing(qc, &frms2); + qel_register_send(&send_list, qc->ael, &frms2); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms2); if (!sret) goto leave; @@ -1173,24 +981,38 @@ int send_stateless_reset(struct listener *l, struct sockaddr_storage *dstaddr, TRACE_ENTER(QUIC_EV_STATELESS_RST); + /* RFC 9000 10.3. Stateless Reset + * + * Endpoints MUST discard packets that are too small to be valid QUIC + * packets. To give an example, with the set of AEAD functions defined + * in [QUIC-TLS], short header packets that are smaller than 21 bytes + * are never valid. + * + * [...] + * + * RFC 9000 10.3.3. Looping + * + * An endpoint MUST ensure that every Stateless Reset that it sends is + * smaller than the packet that triggered it, unless it maintains state + * sufficient to prevent looping. In the event of a loop, this results + * in packets eventually being too small to trigger a response. + */ + if (rxpkt->len <= QUIC_STATELESS_RESET_PACKET_MINLEN) { + TRACE_DEVEL("rxpkt too short", QUIC_EV_STATELESS_RST); + goto leave; + } + prx = l->bind_conf->frontend; prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module); - /* 10.3 Stateless Reset (https://www.rfc-editor.org/rfc/rfc9000.html#section-10.3) - * The resulting minimum size of 21 bytes does not guarantee that a Stateless - * Reset is difficult to distinguish from other packets if the recipient requires - * the use of a connection ID. To achieve that end, the endpoint SHOULD ensure - * that all packets it sends are at least 22 bytes longer than the minimum - * connection ID length that it requests the peer to include in its packets, - * adding PADDING frames as necessary. This ensures that any Stateless Reset - * sent by the peer is indistinguishable from a valid packet sent to the endpoint. + + /* RFC 9000 10.3. Stateless Reset + * * An endpoint that sends a Stateless Reset in response to a packet that is * 43 bytes or shorter SHOULD send a Stateless Reset that is one byte shorter * than the packet it responds to. */ - - /* Note that we build at most a 42 bytes QUIC packet to mimic a short packet */ - pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : 0; - pktlen = QUIC_MAX(QUIC_STATELESS_RESET_PACKET_MINLEN, pktlen); + pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : + QUIC_STATELESS_RESET_PACKET_MINLEN; rndlen = pktlen - QUIC_STATELESS_RESET_TOKEN_LEN; /* Put a header of random bytes */ @@ -1320,7 +1142,7 @@ static inline int quic_write_uint32(unsigned char **buf, if (end - *buf < sizeof val) return 0; - *(uint32_t *)*buf = htonl(val); + write_u32(*buf, htonl(val)); *buf += sizeof val; return 1; diff --git a/src/resolvers.c b/src/resolvers.c index 3275cd2..47b0cce 100644 --- a/src/resolvers.c +++ b/src/resolvers.c @@ -28,6 +28,7 @@ #include <haproxy/check.h> #include <haproxy/cli.h> #include <haproxy/dns.h> +#include <haproxy/dns_ring.h> #include <haproxy/errors.h> #include <haproxy/fd.h> #include <haproxy/http_rules.h> @@ -36,7 +37,6 @@ #include <haproxy/protocol.h> #include <haproxy/proxy.h> #include <haproxy/resolvers.h> -#include <haproxy/ring.h> #include <haproxy/sample.h> #include <haproxy/sc_strm.h> #include <haproxy/server.h> @@ -50,6 +50,10 @@ #include <haproxy/vars.h> #include <haproxy/xxhash.h> +#if defined(USE_PROMEX) +#include <promex/promex.h> +#endif + struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers); struct list resolv_srvrq_list = LIST_HEAD_INIT(resolv_srvrq_list); @@ -92,7 +96,7 @@ enum { RSLV_STAT_END, }; -static struct name_desc resolv_stats[] = { +static struct stat_col resolv_stats[] = { [RSLV_STAT_ID] = { .name = "id", .desc = "ID" }, [RSLV_STAT_PID] = { .name = "pid", .desc = "Parent ID" }, [RSLV_STAT_SENT] = { .name = "sent", .desc = "Sent" }, @@ -114,26 +118,79 @@ static struct name_desc resolv_stats[] = { static struct dns_counters dns_counters; -static void resolv_fill_stats(void *d, struct field *stats) +static int resolv_fill_stats(void *d, struct field *stats, unsigned int *selected_field) { struct dns_counters *counters = d; - stats[RSLV_STAT_ID] = mkf_str(FO_CONFIG, counters->id); - stats[RSLV_STAT_PID] = mkf_str(FO_CONFIG, counters->pid); - stats[RSLV_STAT_SENT] = mkf_u64(FN_GAUGE, counters->sent); - stats[RSLV_STAT_SND_ERROR] = mkf_u64(FN_GAUGE, counters->snd_error); - stats[RSLV_STAT_VALID] = mkf_u64(FN_GAUGE, counters->app.resolver.valid); - stats[RSLV_STAT_UPDATE] = mkf_u64(FN_GAUGE, counters->app.resolver.update); - stats[RSLV_STAT_CNAME] = mkf_u64(FN_GAUGE, counters->app.resolver.cname); - stats[RSLV_STAT_CNAME_ERROR] = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error); - stats[RSLV_STAT_ANY_ERR] = mkf_u64(FN_GAUGE, counters->app.resolver.any_err); - stats[RSLV_STAT_NX] = mkf_u64(FN_GAUGE, counters->app.resolver.nx); - stats[RSLV_STAT_TIMEOUT] = mkf_u64(FN_GAUGE, counters->app.resolver.timeout); - stats[RSLV_STAT_REFUSED] = mkf_u64(FN_GAUGE, counters->app.resolver.refused); - stats[RSLV_STAT_OTHER] = mkf_u64(FN_GAUGE, counters->app.resolver.other); - stats[RSLV_STAT_INVALID] = mkf_u64(FN_GAUGE, counters->app.resolver.invalid); - stats[RSLV_STAT_TOO_BIG] = mkf_u64(FN_GAUGE, counters->app.resolver.too_big); - stats[RSLV_STAT_TRUNCATED] = mkf_u64(FN_GAUGE, counters->app.resolver.truncated); - stats[RSLV_STAT_OUTDATED] = mkf_u64(FN_GAUGE, counters->app.resolver.outdated); + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); + + for (; current_field < RSLV_STAT_END; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case RSLV_STAT_ID: + metric = mkf_str(FO_CONFIG, counters->id); + break; + case RSLV_STAT_PID: + metric = mkf_str(FO_CONFIG, counters->pid); + break; + case RSLV_STAT_SENT: + metric = mkf_u64(FN_GAUGE, counters->sent); + break; + case RSLV_STAT_SND_ERROR: + metric = mkf_u64(FN_GAUGE, counters->snd_error); + break; + case RSLV_STAT_VALID: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.valid); + break; + case RSLV_STAT_UPDATE: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.update); + break; + case RSLV_STAT_CNAME: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.cname); + break; + case RSLV_STAT_CNAME_ERROR: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error); + break; + case RSLV_STAT_ANY_ERR: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.any_err); + break; + case RSLV_STAT_NX: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.nx); + break; + case RSLV_STAT_TIMEOUT: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.timeout); + break; + case RSLV_STAT_REFUSED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.refused); + break; + case RSLV_STAT_OTHER: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.other); + break; + case RSLV_STAT_INVALID: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.invalid); + break; + case RSLV_STAT_TOO_BIG: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.too_big); + break; + case RSLV_STAT_TRUNCATED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.truncated); + break; + case RSLV_STAT_OUTDATED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.outdated); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module rslv_stats_module = { @@ -170,6 +227,20 @@ struct resolvers *find_resolvers_by_id(const char *id) return NULL; } +/* Returns a pointer to the nameserver matching numerical <id> within <parent> + * resolver section. NULL is returned if no match is found. + */ +struct dns_nameserver *find_nameserver_by_resolvers_and_id(struct resolvers *parent, unsigned int id) +{ + struct dns_nameserver *ns; + + list_for_each_entry(ns, &parent->nameservers, list) { + if (ns->puid == id) + return ns; + } + return NULL; +} + /* Returns a pointer on the SRV request matching the name <name> for the proxy * <px>. NULL is returned if no match is found. */ @@ -645,14 +716,17 @@ static void leave_resolver_code() */ static void resolv_srvrq_cleanup_srv(struct server *srv) { + struct server_inetaddr srv_addr; + _resolv_unlink_resolution(srv->resolv_requester); HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); - srvrq_update_srv_status(srv, 1); + srvrq_set_srv_down(srv); ha_free(&srv->hostname); ha_free(&srv->hostname_dn); srv->hostname_dn_len = 0; - memset(&srv->addr, 0, sizeof(srv->addr)); - srv->svc_port = 0; + memset(&srv_addr, 0, sizeof(srv_addr)); + /* unset server's addr AND port */ + server_set_inetaddr(srv, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); srv->flags |= SRV_F_NO_RESOLUTION; ebpt_delete(&srv->host_dn); @@ -815,12 +889,16 @@ static void resolv_check_response(struct resolv_resolution *res) srv_found: /* And update this server, if found (srv is locked here) */ if (srv) { + struct server_inetaddr srv_addr; + uint8_t ip_change = 0; + /* re-enable DNS resolution for this server by default */ srv->flags &= ~SRV_F_NO_RESOLUTION; srv->srvrq_check->expire = TICK_ETERNITY; - srv->svc_port = item->port; - srv->flags &= ~SRV_F_MAPPORTS; + server_get_inetaddr(srv, &srv_addr); + srv_addr.port.svc = item->port; + srv_addr.port.map = 0; /* Check if an Additional Record is associated to this SRV record. * Perform some sanity checks too to ensure the record can be used. @@ -833,10 +911,12 @@ srv_found: switch (item->ar_item->type) { case DNS_RTYPE_A: - srv_update_addr(srv, &item->ar_item->data.in4.sin_addr, AF_INET, "DNS additional record"); + srv_addr.family = AF_INET; + srv_addr.addr.v4 = item->ar_item->data.in4.sin_addr; break; case DNS_RTYPE_AAAA: - srv_update_addr(srv, &item->ar_item->data.in6.sin6_addr, AF_INET6, "DNS additional record"); + srv_addr.family = AF_INET6; + srv_addr.addr.v6 = item->ar_item->data.in6.sin6_addr; break; } @@ -846,8 +926,15 @@ srv_found: * It is usless to perform an extra resolution */ _resolv_unlink_resolution(srv->resolv_requester); + + ip_change = 1; } + if (ip_change) + server_set_inetaddr_warn(srv, &srv_addr, SERVER_INETADDR_UPDATER_DNS_AR); + else + server_set_inetaddr(srv, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + if (!srv->hostname_dn) { const char *msg = NULL; char hostname[DNS_MAX_NAME_SIZE+1]; @@ -873,9 +960,6 @@ srv_found: resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1); } - /* Update the server status */ - srvrq_update_srv_status(srv, (srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6)); - if (!srv->resolv_opts.ignore_weight) { char weight[9]; int ha_weight; @@ -2487,11 +2571,11 @@ static void resolvers_destroy(struct resolvers *resolvers) fd_delete(ns->dgram->conn.t.sock.fd); close(ns->dgram->conn.t.sock.fd); } - ring_free(ns->dgram->ring_req); + dns_ring_free(ns->dgram->ring_req); free(ns->dgram); } if (ns->stream) { - ring_free(ns->stream->ring_req); + dns_ring_free(ns->stream->ring_req); task_destroy(ns->stream->task_req); task_destroy(ns->stream->task_rsp); free(ns->stream); @@ -2684,14 +2768,15 @@ static int stats_dump_resolv_to_buffer(struct stconn *sc, list_for_each_entry(mod, stat_modules, list) { struct counters_node *counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod); - mod->fill_stats(counters, stats + idx); + if (!mod->fill_stats(counters, stats + idx, NULL)) + continue; idx += mod->stats_count; } if (!stats_dump_one_line(stats, idx, appctx)) return 0; - if (!stats_putchk(appctx, NULL)) + if (!stats_putchk(appctx, NULL, NULL)) goto full; return 1; @@ -2797,6 +2882,7 @@ int resolv_allocate_counters(struct list *stat_modules) if (strcmp(mod->name, "resolvers") == 0) { ns->counters = (struct dns_counters *)ns->extra_counters->data + mod->counters_off[COUNTERS_RSLV]; ns->counters->id = ns->id; + ns->counters->ns_puid = ns->puid; ns->counters->pid = resolvers->id; } } @@ -3238,7 +3324,7 @@ int check_action_do_resolve(struct act_rule *rule, struct proxy *px, char **err) void resolvers_setup_proxy(struct proxy *px) { - px->last_change = ns_to_sec(now_ns); + px->fe_counters.last_change = px->be_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_FE | PR_CAP_BE; px->maxconn = 0; px->conn_retries = 1; @@ -3371,7 +3457,9 @@ static int parse_resolve_conf(char **errmsg, char **warnmsg) newnameserver->parent = curr_resolvers; newnameserver->process_responses = resolv_process_responses; newnameserver->conf.line = resolv_linenum; + newnameserver->puid = curr_resolvers->nb_nameservers; LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list); + curr_resolvers->nb_nameservers++; } resolv_out: @@ -3428,6 +3516,7 @@ static int resolvers_new(struct resolvers **resolvers, const char *id, const cha r->timeout.resolve = 1000; r->timeout.retry = 1000; r->resolve_retries = 3; + r->nb_nameservers = 0; LIST_INIT(&r->nameservers); LIST_INIT(&r->resolutions.curr); LIST_INIT(&r->resolutions.wait); @@ -3572,8 +3661,10 @@ int cfg_parse_resolvers(const char *file, int linenum, char **args, int kwm) newnameserver->parent = curr_resolvers; newnameserver->process_responses = resolv_process_responses; newnameserver->conf.line = linenum; + newnameserver->puid = curr_resolvers->nb_nameservers; /* the nameservers are linked backward first */ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list); + curr_resolvers->nb_nameservers++; } else if (strcmp(args[0], "parse-resolv-conf") == 0) { err_code |= parse_resolve_conf(&errmsg, &warnmsg); @@ -3744,14 +3835,14 @@ out: */ int resolvers_create_default() { - int err_code = 0; + int err_code = ERR_NONE; if (global.mode & MODE_MWORKER_WAIT) /* does not create the section if in wait mode */ - return 0; + return ERR_NONE; /* if the section already exists, do nothing */ if (find_resolvers_by_id("default")) - return 0; + return ERR_NONE; curr_resolvers = NULL; err_code |= resolvers_new(&curr_resolvers, "default", "<internal>", 0); @@ -3777,7 +3868,7 @@ err: /* we never return an error there, we only try to create this section * if that's possible */ - return 0; + return ERR_NONE; } int cfg_post_parse_resolvers() @@ -3811,3 +3902,70 @@ REGISTER_CONFIG_SECTION("resolvers", cfg_parse_resolvers, cfg_post_parse_re REGISTER_POST_DEINIT(resolvers_deinit); REGISTER_CONFIG_POSTPARSER("dns runtime resolver", resolvers_finalize_config); REGISTER_PRE_CHECK(resolvers_create_default); + +#if defined(USE_PROMEX) + +static int rslv_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc) +{ + if (id >= RSLV_STAT_END) + return -1; + if (id == RSLV_STAT_ID || id == RSLV_STAT_PID) + return 0; + + *metric = (struct promex_metric){ .n = ist(resolv_stats[id].name), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist(resolv_stats[id].desc); + return 1; +} + +static void *rslv_promex_start_ts(void *unused, unsigned int id) +{ + struct resolvers *resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list); + + return LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list); +} + +static void *rslv_promex_next_ts(void *unused, void *metric_ctx, unsigned int id) +{ + struct dns_nameserver *ns = metric_ctx; + struct resolvers *resolver = ns->parent; + + ns = LIST_NEXT(&ns->list, struct dns_nameserver *, list); + if (&ns->list == &resolver->nameservers) { + resolver = LIST_NEXT(&resolver->list, struct resolvers *, list); + ns = ((&resolver->list == &sec_resolvers) + ? NULL + : LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list)); + } + return ns; +} + +static int rslv_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field) +{ + struct dns_nameserver *ns = metric_ctx; + struct resolvers *resolver = ns->parent; + struct field stats[RSLV_STAT_END]; + int ret; + + labels[0].name = ist("resolver"); + labels[0].value = ist(resolver->id); + labels[1].name = ist("nameserver"); + labels[1].value = ist(ns->id); + + ret = resolv_fill_stats(ns->counters, stats, &id); + if (ret == 1) + *field = stats[id]; + return ret; +} + +static struct promex_module promex_resolver_module = { + .name = IST("resolver"), + .metric_info = rslv_promex_metric_info, + .start_ts = rslv_promex_start_ts, + .next_ts = rslv_promex_next_ts, + .fill_ts = rslv_promex_fill_ts, + .nb_metrics = RSLV_STAT_END, +}; + +INITCALL1(STG_REGISTER, promex_register_module, &promex_resolver_module); + +#endif @@ -22,11 +22,13 @@ #include <haproxy/api.h> #include <haproxy/applet.h> #include <haproxy/buf.h> +#include <haproxy/cfgparse.h> #include <haproxy/cli.h> #include <haproxy/ring.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/thread.h> +#include <haproxy/vecpair.h> /* context used to dump the contents of a ring via "show events" or "show errors" */ struct show_ring_ctx { @@ -35,117 +37,120 @@ struct show_ring_ctx { uint flags; /* set of RING_WF_* */ }; -/* Initialize a pre-allocated ring with the buffer area - * of size */ -void ring_init(struct ring *ring, void *area, size_t size) +/* Initialize a pre-allocated ring with the buffer area of size <size>. + * Makes the storage point to the indicated area and adjusts the declared + * ring size according to the position of the area in the storage. If <reset> + * is non-zero, the storage area is reset, otherwise it's left intact (except + * for the area origin pointer which is updated so that the area can come from + * an mmap()). + */ +void ring_init(struct ring *ring, void *area, size_t size, int reset) { - HA_RWLOCK_INIT(&ring->lock); - LIST_INIT(&ring->waiters); + MT_LIST_INIT(&ring->waiters); ring->readers_count = 0; - ring->buf = b_make(area, size, 0, 0); - /* write the initial RC byte */ - b_putchr(&ring->buf, 0); + ring->flags = 0; + ring->storage = area; + ring->pending = 0; + ring->waking = 0; + memset(&ring->queue, 0, sizeof(ring->queue)); + + if (reset) { + ring->storage->size = size - sizeof(*ring->storage); + ring->storage->rsvd = sizeof(*ring->storage); + ring->storage->head = 0; + ring->storage->tail = 0; + + /* write the initial RC byte */ + *ring->storage->area = 0; + ring->storage->tail = 1; + } } -/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on - * allocation failure. +/* Creates a ring and its storage area at address <area> for <size> bytes. + * If <area> is null, then it's allocated of the requested size. The ring + * storage struct is part of the area so the usable area is slightly reduced. + * However the storage is immediately adjacent to the struct so that the ring + * remains consistent on-disk. ring_free() will ignore such ring storages and + * will only release the ring part, so the caller is responsible for releasing + * them. If <reset> is non-zero, the storage area is reset, otherwise it's left + * intact. */ -struct ring *ring_new(size_t size) +struct ring *ring_make_from_area(void *area, size_t size, int reset) { struct ring *ring = NULL; - void *area = NULL; + uint flags = 0; - if (size < 2) - goto fail; + if (size < sizeof(*ring->storage) + 2) + return NULL; ring = malloc(sizeof(*ring)); if (!ring) goto fail; - area = malloc(size); + if (!area) + area = malloc(size); + else + flags |= RING_FL_MAPPED; + if (!area) goto fail; - ring_init(ring, area, size); + ring_init(ring, area, size, reset); + ring->flags |= flags; return ring; fail: - free(area); free(ring); return NULL; } -/* Creates a unified ring + storage area at address <area> for <size> bytes. - * If <area> is null, then it's allocated of the requested size. The ring - * struct is part of the area so the usable area is slightly reduced. However - * the ring storage is immediately adjacent to the struct. ring_free() will - * ignore such rings, so the caller is responsible for releasing them. - */ -struct ring *ring_make_from_area(void *area, size_t size) -{ - struct ring *ring = NULL; - - if (size < sizeof(*ring)) - return NULL; - - if (!area) - area = malloc(size); - if (!area) - return NULL; - - ring = area; - area += sizeof(*ring); - ring_init(ring, area, size - sizeof(*ring)); - return ring; -} - -/* Cast an unified ring + storage area to a ring from <area>, without - * reinitializing the data buffer. - * - * Reinitialize the waiters and the lock. +/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on + * allocation failure. The size is the area size, not the usable size. */ -struct ring *ring_cast_from_area(void *area) +struct ring *ring_new(size_t size) { - struct ring *ring = NULL; - - ring = area; - ring->buf.area = area + sizeof(*ring); - - HA_RWLOCK_INIT(&ring->lock); - LIST_INIT(&ring->waiters); - ring->readers_count = 0; - - return ring; + return ring_make_from_area(NULL, size, 1); } /* Resizes existing ring <ring> to <size> which must be larger, without losing * its contents. The new size must be at least as large as the previous one or * no change will be performed. The pointer to the ring is returned on success, - * or NULL on allocation failure. This will lock the ring for writes. + * or NULL on allocation failure. This will lock the ring for writes. The size + * is the allocated area size, and includes the ring_storage header. */ struct ring *ring_resize(struct ring *ring, size_t size) { - void *area; + struct ring_storage *old, *new; - if (b_size(&ring->buf) >= size) + if (size <= ring_data(ring) + sizeof(*ring->storage)) return ring; - area = malloc(size); - if (!area) + old = ring->storage; + new = malloc(size); + if (!new) return NULL; - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + thread_isolate(); - /* recheck the buffer's size, it may have changed during the malloc */ - if (b_size(&ring->buf) < size) { + /* recheck the ring's size, it may have changed during the malloc */ + if (size > ring_data(ring) + sizeof(*ring->storage)) { /* copy old contents */ - b_getblk(&ring->buf, area, ring->buf.data, 0); - area = HA_ATOMIC_XCHG(&ring->buf.area, area); - ring->buf.size = size; + struct ist v1, v2; + size_t len; + + vp_ring_to_data(&v1, &v2, old->area, old->size, old->head, old->tail); + len = vp_size(v1, v2); + vp_peek_ofs(v1, v2, 0, new->area, len); + new->size = size - sizeof(*ring->storage); + new->rsvd = sizeof(*ring->storage); + new->head = 0; + new->tail = len; + new = HA_ATOMIC_XCHG(&ring->storage, new); } - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + thread_release(); - free(area); + /* free the unused one */ + free(new); return ring; } @@ -156,10 +161,8 @@ void ring_free(struct ring *ring) return; /* make sure it was not allocated by ring_make_from_area */ - if (ring->buf.area == (void *)ring + sizeof(*ring)) - return; - - free(ring->buf.area); + if (!(ring->flags & RING_FL_MAPPED)) + free(ring->storage); free(ring); } @@ -173,12 +176,20 @@ void ring_free(struct ring *ring) */ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg) { - struct buffer *buf = &ring->buf; - struct appctx *appctx; - size_t totlen = 0; + struct ring_wait_cell **ring_queue_ptr = DISGUISE(&ring->queue[ti->ring_queue].ptr); + struct ring_wait_cell cell, *next_cell, *curr_cell; + size_t *tail_ptr = &ring->storage->tail; + size_t head_ofs, tail_ofs, new_tail_ofs; + size_t ring_size; + char *ring_area; + struct ist v1, v2; + size_t msglen = 0; size_t lenlen; + size_t needed; uint64_t dellen; int dellenlen; + uint8_t *lock_ptr; + uint8_t readers; ssize_t sent = 0; int i; @@ -191,20 +202,125 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz * copying due to the varint encoding of the length. */ for (i = 0; i < npfx; i++) - totlen += pfx[i].len; + msglen += pfx[i].len; for (i = 0; i < nmsg; i++) - totlen += msg[i].len; + msglen += msg[i].len; - if (totlen > maxlen) - totlen = maxlen; + if (msglen > maxlen) + msglen = maxlen; - lenlen = varint_bytes(totlen); + lenlen = varint_bytes(msglen); - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - if (lenlen + totlen + 1 + 1 > b_size(buf)) - goto done_buf; + /* We need: + * - lenlen bytes for the size encoding + * - msglen for the message + * - one byte for the new marker + * + * Note that we'll also reserve one extra byte to make sure we never + * leave a full buffer (the vec-to-ring conversion cannot be done if + * both areas are of size 0). + */ + needed = lenlen + msglen + 1; - while (b_room(buf) < lenlen + totlen + 1) { + /* these ones do not change under us (only resize affects them and it + * must be done under thread isolation). + */ + ring_area = ring->storage->area; + ring_size = ring->storage->size; + + if (needed + 1 > ring_size) + goto leave; + + cell.to_send_self = needed; + cell.needed_tot = 0; // only when non-zero the cell is considered ready. + cell.maxlen = msglen; + cell.pfx = pfx; + cell.npfx = npfx; + cell.msg = msg; + cell.nmsg = nmsg; + + /* insert our cell into the queue before the previous one. We may have + * to wait a bit if the queue's leader is attempting an election to win + * the tail, hence the busy value (should be rare enough). + */ + next_cell = HA_ATOMIC_XCHG(ring_queue_ptr, &cell); + + /* let's add the cumulated size of pending messages to ours */ + cell.next = next_cell; + if (next_cell) { + size_t next_needed; + + while ((next_needed = HA_ATOMIC_LOAD(&next_cell->needed_tot)) == 0) + __ha_cpu_relax_for_read(); + needed += next_needed; + } + + /* now <needed> will represent the size to store *all* messages. The + * atomic store may unlock a subsequent thread waiting for this one. + */ + HA_ATOMIC_STORE(&cell.needed_tot, needed); + + /* OK now we're the queue leader, it's our job to try to get ownership + * of the tail, if we succeeded above, we don't even enter the loop. If + * we failed, we set ourselves at the top the queue, waiting for the + * tail to be unlocked again. We stop doing that if another thread + * comes in and becomes the leader in turn. + */ + + /* Wait for another thread to take the lead or for the tail to + * be available again. It's critical to be read-only in this + * loop so as not to lose time synchronizing cache lines. Also, + * we must detect a new leader ASAP so that the fewest possible + * threads check the tail. + */ + + while (1) { + if ((curr_cell = HA_ATOMIC_LOAD(ring_queue_ptr)) != &cell) + goto wait_for_flush; + __ha_cpu_relax_for_read(); + +#if !defined(__ARM_FEATURE_ATOMICS) + /* ARMv8.1-a has a true atomic OR and doesn't need the preliminary read */ + if ((tail_ofs = HA_ATOMIC_LOAD(tail_ptr)) & RING_TAIL_LOCK) { + __ha_cpu_relax_for_read(); + continue; + } +#endif + /* OK the queue is locked, let's attempt to get the tail lock */ + tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK); + + /* did we get it ? */ + if (!(tail_ofs & RING_TAIL_LOCK)) { + /* Here we own the tail. We can go on if we're still the leader, + * which we'll confirm by trying to reset the queue. If we're + * still the leader, we're done. + */ + if (HA_ATOMIC_CAS(ring_queue_ptr, &curr_cell, NULL)) + break; // Won! + + /* oops, no, let's give it back to another thread and wait. + * This does not happen often enough to warrant more complex + * approaches (tried already). + */ + HA_ATOMIC_STORE(tail_ptr, tail_ofs); + goto wait_for_flush; + } + __ha_cpu_relax_for_read(); + } + + head_ofs = HA_ATOMIC_LOAD(&ring->storage->head); + + /* this is the byte before tail, it contains the users count */ + lock_ptr = (uint8_t*)ring_area + (tail_ofs > 0 ? tail_ofs - 1 : ring_size - 1); + + /* Take the lock on the area. We're guaranteed to be the only writer + * here. + */ + readers = HA_ATOMIC_XCHG(lock_ptr, RING_WRITING_SIZE); + + vp_ring_to_data(&v1, &v2, ring_area, ring_size, head_ofs, tail_ofs); + + while (vp_size(v1, v2) > ring_size - needed - 1 - 1) { /* we need to delete the oldest message (from the end), * and we have to stop if there's a reader stuck there. * Unless there's corruption in the buffer it's guaranteed @@ -212,50 +328,142 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz * varint-encoded length (1 byte min) and the message * payload (0 bytes min). */ - if (*b_head(buf)) - goto done_buf; - dellenlen = b_peek_varint(buf, 1, &dellen); + if (*_vp_head(v1, v2)) + break; + dellenlen = vp_peek_varint_ofs(v1, v2, 1, &dellen); if (!dellenlen) - goto done_buf; - BUG_ON(b_data(buf) < 1 + dellenlen + dellen); - - b_del(buf, 1 + dellenlen + dellen); + break; + BUG_ON_HOT(vp_size(v1, v2) < 1 + dellenlen + dellen); + vp_skip(&v1, &v2, 1 + dellenlen + dellen); } - /* OK now we do have room */ - __b_put_varint(buf, totlen); + /* now let's update the buffer with the new tail if our message will fit */ + new_tail_ofs = tail_ofs; + if (vp_size(v1, v2) <= ring_size - needed - 1 - 1) { + vp_data_to_ring(v1, v2, ring_area, ring_size, &head_ofs, &tail_ofs); + + /* update the new space in the buffer */ + HA_ATOMIC_STORE(&ring->storage->head, head_ofs); - totlen = 0; - for (i = 0; i < npfx; i++) { - size_t len = pfx[i].len; + /* calculate next tail pointer */ + new_tail_ofs += needed; + if (new_tail_ofs >= ring_size) + new_tail_ofs -= ring_size; - if (len + totlen > maxlen) - len = maxlen - totlen; - if (len) - __b_putblk(buf, pfx[i].ptr, len); - totlen += len; + /* reset next read counter before releasing writers */ + HA_ATOMIC_STORE(ring_area + (new_tail_ofs > 0 ? new_tail_ofs - 1 : ring_size - 1), 0); + } + else { + /* release readers right now, before writing the tail, so as + * not to expose the readers count byte to another writer. + */ + HA_ATOMIC_STORE(lock_ptr, readers); } - for (i = 0; i < nmsg; i++) { - size_t len = msg[i].len; + /* and release other writers */ + HA_ATOMIC_STORE(tail_ptr, new_tail_ofs); + + vp_ring_to_room(&v1, &v2, ring_area, ring_size, (new_tail_ofs > 0 ? new_tail_ofs - 1 : ring_size - 1), tail_ofs); + + if (likely(tail_ofs != new_tail_ofs)) { + /* the list stops on a NULL */ + for (curr_cell = &cell; curr_cell; curr_cell = HA_ATOMIC_LOAD(&curr_cell->next)) { + maxlen = curr_cell->maxlen; + pfx = curr_cell->pfx; + npfx = curr_cell->npfx; + msg = curr_cell->msg; + nmsg = curr_cell->nmsg; + + /* let's write the message size */ + vp_put_varint(&v1, &v2, maxlen); + + /* then write the messages */ + msglen = 0; + for (i = 0; i < npfx; i++) { + size_t len = pfx[i].len; + + if (len + msglen > maxlen) + len = maxlen - msglen; + if (len) + vp_putblk(&v1, &v2, pfx[i].ptr, len); + msglen += len; + } + + for (i = 0; i < nmsg; i++) { + size_t len = msg[i].len; + + if (len + msglen > maxlen) + len = maxlen - msglen; + if (len) + vp_putblk(&v1, &v2, msg[i].ptr, len); + msglen += len; + } + + /* for all but the last message we need to write the + * readers count byte. + */ + if (curr_cell->next) + vp_putchr(&v1, &v2, 0); + } + + /* now release */ + for (curr_cell = &cell; curr_cell; curr_cell = next_cell) { + next_cell = HA_ATOMIC_LOAD(&curr_cell->next); + _HA_ATOMIC_STORE(&curr_cell->next, curr_cell); + } - if (len + totlen > maxlen) - len = maxlen - totlen; - if (len) - __b_putblk(buf, msg[i].ptr, len); - totlen += len; + /* unlock the message area */ + HA_ATOMIC_STORE(lock_ptr, readers); + } else { + /* messages were dropped, notify about this and release them */ + for (curr_cell = &cell; curr_cell; curr_cell = next_cell) { + next_cell = HA_ATOMIC_LOAD(&curr_cell->next); + HA_ATOMIC_STORE(&curr_cell->to_send_self, 0); + _HA_ATOMIC_STORE(&curr_cell->next, curr_cell); + } } - *b_tail(buf) = 0; buf->data++; // new read counter - sent = lenlen + totlen + 1; + /* we must not write the trailing read counter, it was already done, + * plus we could ruin the one of the next writer. And the front was + * unlocked either at the top if the ring was full, or just above if it + * could be properly filled. + */ + + sent = cell.to_send_self; /* notify potential readers */ - list_for_each_entry(appctx, &ring->waiters, wait_entry) - appctx_wakeup(appctx); + if (sent && HA_ATOMIC_LOAD(&ring->readers_count)) { + HA_ATOMIC_INC(&ring->pending); + while (HA_ATOMIC_LOAD(&ring->pending) && HA_ATOMIC_XCHG(&ring->waking, 1) == 0) { + struct mt_list *elt1, elt2; + struct appctx *appctx; + + HA_ATOMIC_STORE(&ring->pending, 0); + mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + appctx_wakeup(appctx); + HA_ATOMIC_STORE(&ring->waking, 0); + } + } - done_buf: - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + leave: return sent; + + wait_for_flush: + /* if we arrive here, it means we found another leader */ + + /* The leader will write our own pointer in the cell's next to + * mark it as released. Let's wait for this. + */ + do { + next_cell = HA_ATOMIC_LOAD(&cell.next); + } while (next_cell != &cell && __ha_cpu_relax_for_read()); + + /* OK our message was queued. Retrieving the sent size in the ring cell + * allows another leader thread to zero it if it finally couldn't send + * it (should only happen when using too small ring buffers to store + * all competing threads' messages at once). + */ + return HA_ATOMIC_LOAD(&cell.to_send_self); } /* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is @@ -270,7 +478,7 @@ int ring_attach(struct ring *ring) int users = ring->readers_count; do { - if (users >= 255) + if (users >= RING_MAX_READERS) return 0; } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1)); return 1; @@ -285,20 +493,22 @@ void ring_detach_appctx(struct ring *ring, struct appctx *appctx, size_t ofs) if (!ring) return; - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + HA_ATOMIC_DEC(&ring->readers_count); + if (ofs != ~0) { /* reader was still attached */ - if (ofs < b_head_ofs(&ring->buf)) - ofs += b_size(&ring->buf) - b_head_ofs(&ring->buf); - else - ofs -= b_head_ofs(&ring->buf); - - BUG_ON(ofs >= b_size(&ring->buf)); - LIST_DEL_INIT(&appctx->wait_entry); - HA_ATOMIC_DEC(b_peek(&ring->buf, ofs)); + uint8_t *area = (uint8_t *)ring_area(ring); + uint8_t readers; + + BUG_ON(ofs >= ring_size(ring)); + MT_LIST_DELETE(&appctx->wait_entry); + + /* dec readers count */ + do { + readers = _HA_ATOMIC_LOAD(area + ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(area + ofs, &readers, readers - 1)) && __ha_cpu_relax()); } - HA_ATOMIC_DEC(&ring->readers_count); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); } /* Tries to attach CLI handler <appctx> as a new reader on ring <ring>. This is @@ -313,7 +523,7 @@ int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags) if (!ring_attach(ring)) return cli_err(appctx, - "Sorry, too many watchers (255) on this ring buffer. " + "Sorry, too many watchers (" TOSTR(RING_MAX_READERS) ") on this ring buffer. " "What could it have so interesting to attract so many watchers ?"); if (!appctx->io_handler) @@ -328,36 +538,29 @@ int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags) return 0; } -/* This function dumps all events from the ring whose pointer is in <p0> into - * the appctx's output buffer, and takes from <o0> the seek offset into the - * buffer's history (0 for oldest known event). It looks at <i0> for boolean - * options: bit0 means it must wait for new data or any key to be pressed. Bit1 - * means it must seek directly to the end to wait for new contents. It returns - * 0 if the output buffer or events are missing is full and it needs to be - * called again, otherwise non-zero. It is meant to be used with - * cli_release_show_ring() to clean up. + +/* parses as many messages as possible from ring <ring>, starting at the offset + * stored at *ofs_ptr, with RING_WF_* flags in <flags>, and passes them to + * the message handler <msg_handler>. If <last_of_ptr> is not NULL, a copy of + * the last known tail pointer will be copied there so that the caller may use + * this to detect new data have arrived since we left the function. Returns 0 + * if it needs to pause, 1 once finished. */ -int cli_io_handler_show_ring(struct appctx *appctx) +int ring_dispatch_messages(struct ring *ring, void *ctx, size_t *ofs_ptr, size_t *last_ofs_ptr, uint flags, + ssize_t (*msg_handler)(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len)) { - struct show_ring_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); - struct ring *ring = ctx->ring; - struct buffer *buf = &ring->buf; - size_t ofs; - size_t last_ofs; + size_t head_ofs, tail_ofs, prev_ofs; + size_t ring_size; + uint8_t *ring_area; + struct ist v1, v2; uint64_t msg_len; size_t len, cnt; + ssize_t copied; + uint8_t readers; int ret; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); + ring_area = (uint8_t *)ring->storage->area; + ring_size = ring->storage->size; /* explanation for the initialization below: it would be better to do * this in the parsing function but this would occasionally result in @@ -365,59 +568,134 @@ int cli_io_handler_show_ring(struct appctx *appctx) * and keep it while being scheduled. Thus instead let's take it the * first time we enter here so that we have a chance to pass many * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. + * value cannot be produced after initialization. The first offset + * needs to be taken under isolation as it must not move while we're + * trying to catch it. */ - if (unlikely(ctx->ofs == ~0)) { - /* going to the end means looking at tail-1 */ - ctx->ofs = b_peek_ofs(buf, (ctx->flags & RING_WF_SEEK_NEW) ? b_data(buf) - 1 : 0); - HA_ATOMIC_INC(b_orig(buf) + ctx->ofs); + if (unlikely(*ofs_ptr == ~0)) { + thread_isolate(); + + head_ofs = HA_ATOMIC_LOAD(&ring->storage->head); + tail_ofs = ring_tail(ring); + + if (flags & RING_WF_SEEK_NEW) { + /* going to the end means looking at tail-1 */ + head_ofs = tail_ofs + ring_size - 1; + if (head_ofs >= ring_size) + head_ofs -= ring_size; + } + + /* reserve our slot here (inc readers count) */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + head_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + head_ofs, &readers, readers + 1)) && __ha_cpu_relax()); + + thread_release(); + + /* store this precious offset in our context, and we're done */ + *ofs_ptr = head_ofs; } - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. + /* we have the guarantee we can restart from our own head */ + head_ofs = *ofs_ptr; + BUG_ON(head_ofs >= ring_size); + + /* the tail will continue to move but we're getting a safe value + * here that will continue to work. */ - ofs = ctx->ofs - b_head_ofs(buf); - if (ctx->ofs < b_head_ofs(buf)) - ofs += b_size(buf); + tail_ofs = ring_tail(ring); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); + /* we keep track of where we were and we don't release it before + * we've protected the next place. + */ + prev_ofs = head_ofs; - /* in this loop, ofs always points to the counter byte that precedes + /* in this loop, head_ofs always points to the counter byte that precedes * the message so that we can take our reference there if we have to - * stop before the end (ret=0). + * stop before the end (ret=0). The reference is relative to the ring's + * origin, while pos is relative to the ring's head. */ ret = 1; - while (ofs + 1 < b_data(buf)) { + vp_ring_to_data(&v1, &v2, (char *)ring_area, ring_size, head_ofs, tail_ofs); + + while (1) { + if (vp_size(v1, v2) <= 1) { + /* no more data */ + break; + } + + readers = _HA_ATOMIC_LOAD(_vp_addr(v1, v2, 0)); + if (readers > RING_MAX_READERS) { + /* we just met a writer which hasn't finished */ + break; + } + cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); + len = vp_peek_varint_ofs(v1, v2, cnt, &msg_len); if (!len) break; cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - if (unlikely(msg_len + 1 > b_size(&trash))) { + BUG_ON(msg_len + cnt + 1 > vp_size(v1, v2)); + + copied = msg_handler(ctx, v1, v2, cnt, msg_len); + if (copied == -2) { /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; + goto skip; } - - chunk_reset(&trash); - len = b_getblk(buf, trash.area, msg_len, ofs + cnt); - trash.data += len; - trash.area[trash.data++] = '\n'; - - if (applet_putchk(appctx, &trash) == -1) { + else if (copied == -1) { + /* output full */ ret = 0; break; } - ofs += cnt + msg_len; + skip: + vp_skip(&v1, &v2, cnt + msg_len); + } + + vp_data_to_ring(v1, v2, (char *)ring_area, ring_size, &head_ofs, &tail_ofs); + + if (head_ofs != prev_ofs) { + /* inc readers count on new place */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + head_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + head_ofs, &readers, readers + 1)) && __ha_cpu_relax()); + + /* dec readers count on old place */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + prev_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + prev_ofs, &readers, readers - 1)) && __ha_cpu_relax()); } - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - ctx->ofs = b_peek_ofs(buf, ofs); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + if (last_ofs_ptr) + *last_ofs_ptr = tail_ofs; + *ofs_ptr = head_ofs; + return ret; +} + +/* This function dumps all events from the ring whose pointer is in <p0> into + * the appctx's output buffer, and takes from <o0> the seek offset into the + * buffer's history (0 for oldest known event). It looks at <i0> for boolean + * options: bit0 means it must wait for new data or any key to be pressed. Bit1 + * means it must seek directly to the end to wait for new contents. It returns + * 0 if the output buffer or events are missing is full and it needs to be + * called again, otherwise non-zero. It is meant to be used with + * cli_release_show_ring() to clean up. + */ +int cli_io_handler_show_ring(struct appctx *appctx) +{ + struct show_ring_ctx *ctx = appctx->svcctx; + struct stconn *sc = appctx_sc(appctx); + struct ring *ring = ctx->ring; + size_t last_ofs; + size_t ofs; + int ret; + + MT_LIST_DELETE(&appctx->wait_entry); + + ret = ring_dispatch_messages(ring, appctx, &ctx->ofs, &last_ofs, ctx->flags, applet_append_line); if (ret && (ctx->flags & RING_WF_WAIT_MODE)) { /* we've drained everything and are configured to wait for more @@ -425,10 +703,8 @@ int cli_io_handler_show_ring(struct appctx *appctx) */ if (!sc_oc(sc)->output && !(sc->flags & SC_FL_SHUT_DONE)) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(&ring->buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -467,13 +743,41 @@ size_t ring_max_payload(const struct ring *ring) size_t max; /* initial max = bufsize - 1 (initial RC) - 1 (payload RC) */ - max = b_size(&ring->buf) - 1 - 1; + max = ring_size(ring) - 1 - 1; /* subtract payload VI (varint-encoded size) */ max -= varint_bytes(max); return max; } +/* config parser for global "tune.ring.queues", accepts a number from 0 to RING_WAIT_QUEUES */ +static int cfg_parse_tune_ring_queues(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int queues; + + if (too_many_args(1, args, err, NULL)) + return -1; + + queues = atoi(args[1]); + if (queues < 0 || queues > RING_WAIT_QUEUES) { + memprintf(err, "'%s' expects a number between 0 and %d but got '%s'.", args[0], RING_WAIT_QUEUES, args[1]); + return -1; + } + + global.tune.ring_queues = queues; + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.ring.queues", cfg_parse_tune_ring_queues }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + /* * Local variables: * c-indent-level: 8 diff --git a/src/sample.c b/src/sample.c index 89de612..3e5b576 100644 --- a/src/sample.c +++ b/src/sample.c @@ -61,6 +61,21 @@ const char *smp_to_type[SMP_TYPES] = { [SMP_T_METH] = "meth", }; +/* Returns SMP_T_* smp matching with <type> name or SMP_TYPES if + * not found. + */ +int type_to_smp(const char *type) +{ + int it = 0; + + while (it < SMP_TYPES) { + if (strcmp(type, smp_to_type[it]) == 0) + break; // found + it += 1; + } + return it; +} + /* static sample used in sample_process() when <p> is NULL */ static THREAD_LOCAL struct sample temp_smp; @@ -3803,7 +3818,7 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void while (grpc_left > GRPC_MSG_HEADER_SZ) { size_t grpc_msg_len, left; - grpc_msg_len = left = ntohl(*(uint32_t *)(pos + GRPC_MSG_COMPRESS_FLAG_SZ)); + grpc_msg_len = left = ntohl(read_u32(pos + GRPC_MSG_COMPRESS_FLAG_SZ)); pos += GRPC_MSG_HEADER_SZ; grpc_left -= GRPC_MSG_HEADER_SZ; @@ -4766,30 +4781,58 @@ static int smp_check_uuid(struct arg *args, char **err) if (!args[0].type) { args[0].type = ARGT_SINT; args[0].data.sint = 4; - } - else if (args[0].data.sint != 4) { - memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint); - return 0; + } else { + switch (args[0].data.sint) { + case 4: + case 7: + break; + default: + memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint); + return 0; + } } return 1; } -// Generate a RFC4122 UUID (default is v4 = fully random) +// Generate a RFC 9562 UUID (default is v4 = fully random) static int smp_fetch_uuid(const struct arg *args, struct sample *smp, const char *kw, void *private) { - if (args[0].data.sint == 4 || !args[0].type) { - ha_generate_uuid(&trash); - smp->data.type = SMP_T_STR; - smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE; - smp->data.u.str = trash; - return 1; + long long int type = -1; + + if (!args[0].type) { + type = 4; + } else { + type = args[0].data.sint; } - // more implementations of other uuid formats possible here - return 0; + switch (type) { + case 4: + ha_generate_uuid_v4(&trash); + break; + case 7: + ha_generate_uuid_v7(&trash); + break; + default: + return 0; + } + + smp->data.type = SMP_T_STR; + smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE; + smp->data.u.str = trash; + return 1; +} + +/* returns the uptime in seconds */ +static int +smp_fetch_uptime(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + smp->data.type = SMP_T_SINT; + smp->data.u.sint = ns_to_sec(now_ns - start_time_ns); + return 1; } + /* Check if QUIC support was compiled and was not disabled by "no-quic" global option */ static int smp_fetch_quic_enabled(const struct arg *args, struct sample *smp, const char *kw, void *private) { @@ -4915,6 +4958,30 @@ error: return 0; } +/* Server conn queueing infos - bc_{be,srv}_queue */ +static int smp_fetch_conn_queues(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct strm_logs *logs; + + if (!smp->strm) + return 0; + + smp->data.type = SMP_T_SINT; + smp->flags = 0; + + logs = &smp->strm->logs; + + if (kw[3] == 'b') { + /* bc_be_queue */ + smp->data.u.sint = logs->prx_queue_pos; + } + else { + /* bc_srv_queue */ + smp->data.u.sint = logs->srv_queue_pos; + } + return 1; +} + /* Timing events {f,bc}.timer. */ static int smp_fetch_conn_timers(const struct arg *args, struct sample *smp, const char *kw, void *private) { @@ -5029,6 +5096,9 @@ static struct sample_fetch_kw_list smp_logs_kws = {ILH, { { "txn.timer.user", smp_fetch_txn_timers, 0, NULL, SMP_T_SINT, SMP_USE_TXFIN }, /* "Tu" */ { "bc.timer.connect", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "Tc" */ + { "bc_be_queue", smp_fetch_conn_queues, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "bq" */ + { "bc_srv_queue", smp_fetch_conn_queues, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "sq" */ + { "fc.timer.handshake", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, /* "Th" */ { "fc.timer.total", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_SSFIN }, /* "Tt" */ @@ -5063,6 +5133,7 @@ static struct sample_fetch_kw_list smp_kws = {ILH, { { "thread", smp_fetch_thread, 0, NULL, SMP_T_SINT, SMP_USE_CONST }, { "rand", smp_fetch_rand, ARG1(0,SINT), NULL, SMP_T_SINT, SMP_USE_CONST }, { "stopping", smp_fetch_stopping, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN }, + { "uptime", smp_fetch_uptime, 0, NULL, SMP_T_SINT, SMP_USE_CONST }, { "uuid", smp_fetch_uuid, ARG1(0, SINT), smp_check_uuid, SMP_T_STR, SMP_USE_CONST }, { "cpu_calls", smp_fetch_cpu_calls, 0, NULL, SMP_T_SINT, SMP_USE_INTRN }, diff --git a/src/server.c b/src/server.c index 9196fac..caf2f40 100644 --- a/src/server.c +++ b/src/server.c @@ -28,6 +28,7 @@ #include <haproxy/dict-t.h> #include <haproxy/errors.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/log.h> #include <haproxy/mailers.h> #include <haproxy/namespace.h> @@ -140,18 +141,10 @@ const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause) int srv_downtime(const struct server *s) { - if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns)) // ignore negative time + if ((s->cur_state != SRV_ST_STOPPED) || s->counters.last_change >= ns_to_sec(now_ns)) // ignore negative time return s->down_time; - return ns_to_sec(now_ns) - s->last_change + s->down_time; -} - -int srv_lastsession(const struct server *s) -{ - if (s->counters.last_sess) - return ns_to_sec(now_ns) - s->counters.last_sess; - - return -1; + return ns_to_sec(now_ns) - s->counters.last_change + s->down_time; } int srv_getinter(const struct check *check) @@ -170,7 +163,7 @@ int srv_getinter(const struct check *check) /* Update server's addr:svc_port tuple in INET context * - * Must be called under thread isolation to ensure consistent readings accross + * Must be called under thread isolation to ensure consistent readings across * all threads (addr:svc_port might be read without srv lock being held). */ static void _srv_set_inetaddr_port(struct server *srv, @@ -184,6 +177,11 @@ static void _srv_set_inetaddr_port(struct server *srv, else srv->flags &= ~SRV_F_MAPPORTS; + if (srv->proxy->lbprm.update_server_eweight) { + /* some balancers (chash in particular) may use the addr in their routing decisions */ + srv->proxy->lbprm.update_server_eweight(srv); + } + if (srv->log_target && srv->log_target->type == LOG_TARGET_DGRAM) { /* server is used as a log target, manually update log target addr for DGRAM */ ipcpy(addr, srv->log_target->addr); @@ -268,7 +266,7 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne px = proxy_find_by_id(data->server.safe.proxy_uuid, PR_CAP_BE, 0); if (!px) continue; - srv = findserver_unique_id(px, data->server.safe.puid, data->server.safe.rid); + srv = server_find_by_id_unique(px, data->server.safe.puid, data->server.safe.rid); if (!srv) continue; @@ -295,7 +293,7 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne /* * this requires thread isolation, which is safe since we're the only * task working for the current subscription and we don't hold locks - * or ressources that other threads may depend on to complete a running + * or resources that other threads may depend on to complete a running * cycle. Note that we do this way because we assume that this event is * rather rare. */ @@ -306,9 +304,24 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne _srv_set_inetaddr_port(srv, &new_addr, data->safe.next.port.svc, data->safe.next.port.map); - /* propagate the changes */ - if (data->safe.purge_conn) /* force connection cleanup on the given server? */ - srv_cleanup_connections(srv); + /* propagate the changes, force connection cleanup */ + if (new_addr.ss_family != AF_UNSPEC && + (srv->next_admin & SRV_ADMF_RMAINT)) { + /* server was previously put under DNS maintenance due + * to DNS error, but addr resolves again, so we must + * put it out of maintenance + */ + srv_clr_admin_flag(srv, SRV_ADMF_RMAINT); + + /* thanks to valid DNS resolution? */ + if (data->safe.updater.dns) { + chunk_reset(&trash); + chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer", srv->proxy->id, srv->id); + ha_warning("%s.\n", trash.area); + send_log(srv->proxy, LOG_NOTICE, "%s.\n", trash.area); + } + } + srv_cleanup_connections(srv); srv_set_dyncookie(srv); srv_set_addr_desc(srv, 1); } @@ -437,47 +450,25 @@ void _srv_event_hdl_prepare_state(struct event_hdl_cb_data_server_state *cb_data */ static void _srv_event_hdl_prepare_inetaddr(struct event_hdl_cb_data_server_inetaddr *cb_data, struct server *srv, - const struct sockaddr_storage *next_addr, - unsigned int next_port, uint8_t next_mapports, - uint8_t purge_conn) + const struct server_inetaddr *next_inetaddr, + struct server_inetaddr_updater updater) { - struct sockaddr_storage *prev_addr = &srv->addr; - unsigned int prev_port = srv->svc_port; - uint8_t prev_mapports = !!(srv->flags & SRV_F_MAPPORTS); + struct server_inetaddr prev_inetaddr; + + server_get_inetaddr(srv, &prev_inetaddr); /* only INET families are supported */ - BUG_ON((prev_addr->ss_family != AF_UNSPEC && - prev_addr->ss_family != AF_INET && prev_addr->ss_family != AF_INET6) || - (next_addr->ss_family != AF_UNSPEC && - next_addr->ss_family != AF_INET && next_addr->ss_family != AF_INET6)); + BUG_ON((next_inetaddr->family != AF_UNSPEC && + next_inetaddr->family != AF_INET && next_inetaddr->family != AF_INET6)); /* prev */ - cb_data->safe.prev.family = prev_addr->ss_family; - memset(&cb_data->safe.prev.addr, 0, sizeof(cb_data->safe.prev.addr)); - if (prev_addr->ss_family == AF_INET) - cb_data->safe.prev.addr.v4.s_addr = - ((struct sockaddr_in *)prev_addr)->sin_addr.s_addr; - else if (prev_addr->ss_family == AF_INET6) - memcpy(&cb_data->safe.prev.addr.v6, - &((struct sockaddr_in6 *)prev_addr)->sin6_addr, - sizeof(struct in6_addr)); - cb_data->safe.prev.port.svc = prev_port; - cb_data->safe.prev.port.map = prev_mapports; + cb_data->safe.prev = prev_inetaddr; /* next */ - cb_data->safe.next.family = next_addr->ss_family; - memset(&cb_data->safe.next.addr, 0, sizeof(cb_data->safe.next.addr)); - if (next_addr->ss_family == AF_INET) - cb_data->safe.next.addr.v4.s_addr = - ((struct sockaddr_in *)next_addr)->sin_addr.s_addr; - else if (next_addr->ss_family == AF_INET6) - memcpy(&cb_data->safe.next.addr.v6, - &((struct sockaddr_in6 *)next_addr)->sin6_addr, - sizeof(struct in6_addr)); - cb_data->safe.next.port.svc = next_port; - cb_data->safe.next.port.map = next_mapports; + cb_data->safe.next = *next_inetaddr; - cb_data->safe.purge_conn = purge_conn; + /* updater */ + cb_data->safe.updater = updater; } /* server event publishing helper: publish in both global and @@ -900,11 +891,6 @@ static int srv_parse_disabled(char **args, int *cur_arg, static int srv_parse_enabled(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) { - if (newsrv->flags & SRV_F_DYNAMIC) { - ha_warning("Keyword 'enabled' is ignored for dynamic servers. It will be rejected from 3.0 onward."); - return 0; - } - newsrv->next_admin &= ~SRV_ADMF_CMAINT & ~SRV_ADMF_FMAINT; newsrv->next_state = SRV_ST_RUNNING; newsrv->check.state &= ~CHK_ST_PAUSED; @@ -933,6 +919,28 @@ static int srv_parse_error_limit(char **args, int *cur_arg, return 0; } +/* Parse the "guid" keyword */ +static int srv_parse_guid(char **args, int *cur_arg, + struct proxy *curproxy, struct server *newsrv, char **err) +{ + const char *guid; + char *guid_err = NULL; + + if (!*args[*cur_arg + 1]) { + memprintf(err, "'%s' : expects an argument", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + guid = args[*cur_arg + 1]; + if (guid_insert(&newsrv->obj_type, guid, &guid_err)) { + memprintf(err, "'%s': %s", args[*cur_arg], guid_err); + ha_free(&guid_err); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + /* Parse the "ws" keyword */ static int srv_parse_ws(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) @@ -960,6 +968,32 @@ static int srv_parse_ws(char **args, int *cur_arg, return 0; } +/* Parse the "hash-key" server keyword */ +static int srv_parse_hash_key(char **args, int *cur_arg, + struct proxy *curproxy, struct server *newsrv, char **err) +{ + if (!args[*cur_arg + 1]) { + memprintf(err, "'%s expects 'id', 'addr', or 'addr-port' value", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + if (strcmp(args[*cur_arg + 1], "id") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ID; + } + else if (strcmp(args[*cur_arg + 1], "addr") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ADDR; + } + else if (strcmp(args[*cur_arg + 1], "addr-port") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ADDR_PORT; + } + else { + memprintf(err, "'%s' has to be 'id', 'addr', or 'addr-port'", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + /* Parse the "init-addr" server keyword */ static int srv_parse_init_addr(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) @@ -1119,6 +1153,26 @@ static int srv_parse_pool_purge_delay(char **args, int *cur_arg, struct proxy *c return 0; } +static int srv_parse_pool_conn_name(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) +{ + char *arg; + + arg = args[*cur_arg + 1]; + if (!*arg) { + memprintf(err, "'%s' expects <value> as argument", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + ha_free(&newsrv->pool_conn_name); + newsrv->pool_conn_name = strdup(arg); + if (!newsrv->pool_conn_name) { + memprintf(err, "'%s' : out of memory", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + static int srv_parse_pool_low_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) { char *arg; @@ -1199,6 +1253,7 @@ static int srv_parse_namespace(char **args, int *cur_arg, if (strcmp(arg, "*") == 0) { /* Use the namespace associated with the connection (if present). */ newsrv->flags |= SRV_F_USE_NS_FROM_PP; + global.last_checks |= LSTCHK_SYSADM; return 0; } @@ -1217,6 +1272,7 @@ static int srv_parse_namespace(char **args, int *cur_arg, memprintf(err, "Cannot open namespace '%s'", arg); return ERR_ALERT | ERR_FATAL; } + global.last_checks |= LSTCHK_SYSADM; return 0; #else @@ -2230,9 +2286,11 @@ static struct srv_kw_list srv_kws = { "ALL", { }, { { "backup", srv_parse_backup, 0, 1, 1 }, /* Flag as backup server */ { "cookie", srv_parse_cookie, 1, 1, 1 }, /* Assign a cookie to the server */ { "disabled", srv_parse_disabled, 0, 1, 1 }, /* Start the server in 'disabled' state */ - { "enabled", srv_parse_enabled, 0, 1, 1 }, /* Start the server in 'enabled' state */ + { "enabled", srv_parse_enabled, 0, 1, 0 }, /* Start the server in 'enabled' state */ { "error-limit", srv_parse_error_limit, 1, 1, 1 }, /* Configure the consecutive count of check failures to consider a server on error */ + { "guid", srv_parse_guid, 1, 0, 1 }, /* Set global unique ID of the server */ { "ws", srv_parse_ws, 1, 1, 1 }, /* websocket protocol */ + { "hash-key", srv_parse_hash_key, 1, 1, 1 }, /* Configure how chash keys are computed */ { "id", srv_parse_id, 1, 0, 1 }, /* set id# of server */ { "init-addr", srv_parse_init_addr, 1, 1, 0 }, /* */ { "log-bufsize", srv_parse_log_bufsize, 1, 1, 0 }, /* Set the ring bufsize for log server (only for log backends) */ @@ -2251,6 +2309,7 @@ static struct srv_kw_list srv_kws = { "ALL", { }, { { "on-error", srv_parse_on_error, 1, 1, 1 }, /* Configure the action on check failure */ { "on-marked-down", srv_parse_on_marked_down, 1, 1, 1 }, /* Configure the action when a server is marked down */ { "on-marked-up", srv_parse_on_marked_up, 1, 1, 1 }, /* Configure the action when a server is marked up */ + { "pool-conn-name", srv_parse_pool_conn_name, 1, 1, 1 }, /* Define expression to identify connections in idle pool */ { "pool-low-conn", srv_parse_pool_low_conn, 1, 1, 1 }, /* Set the min number of orphan idle connecbefore being allowed to pick from other threads */ { "pool-max-conn", srv_parse_pool_max_conn, 1, 1, 1 }, /* Set the max number of orphan idle connections, -1 means unlimited */ { "pool-purge-delay", srv_parse_pool_purge_delay, 1, 1, 1 }, /* Set the time before we destroy orphan idle connections, defaults to 1s */ @@ -2290,17 +2349,19 @@ void server_recalc_eweight(struct server *sv, int must_update) struct proxy *px = sv->proxy; unsigned w; - if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) { - /* go to full throttle if the slowstart interval is reached */ - if (sv->next_state == SRV_ST_STARTING) + if (ns_to_sec(now_ns) < sv->counters.last_change || ns_to_sec(now_ns) >= sv->counters.last_change + sv->slowstart) { + /* go to full throttle if the slowstart interval is reached unless server is currently down */ + if ((sv->cur_state != SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING)) sv->next_state = SRV_ST_RUNNING; } /* We must take care of not pushing the server to full throttle during slow starts. * It must also start immediately, at least at the minimal step when leaving maintenance. */ - if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) - w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart; + if ((sv->cur_state == SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) + w = 1; + else if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) + w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->counters.last_change) + sv->slowstart) / sv->slowstart; else w = px->lbprm.wdiv; @@ -2334,7 +2395,7 @@ const char *server_parse_weight_change_request(struct server *sv, w = strtol(weight_str, &end, 10); if (end == weight_str) - return "Empty weight string empty or preceded by garbage"; + return "Empty weight string empty or preceded by garbage\n"; else if (end[0] == '%' && end[1] == '\0') { if (w < 0) return "Relative weight must be positive.\n"; @@ -2348,7 +2409,7 @@ const char *server_parse_weight_change_request(struct server *sv, else if (w < 0 || w > 256) return "Absolute weight can only be between 0 and 256 inclusive.\n"; else if (end[0] != '\0') - return "Trailing garbage in weight string"; + return "Trailing garbage in weight string\n"; if (w && w != sv->iweight && !(px->lbprm.algo & BE_LB_PROP_DYN)) return "Backend is using a static LB algorithm and only accepts weights '0%' and '100%'.\n"; @@ -2360,32 +2421,6 @@ const char *server_parse_weight_change_request(struct server *sv, } /* - * Parses <addr_str> and configures <sv> accordingly. <from> precise - * the source of the change in the associated message log. - * Returns: - * - error string on error - * - NULL on success - * - * Must be called with the server lock held. - */ -const char *server_parse_addr_change_request(struct server *sv, - const char *addr_str, const char *updater) -{ - unsigned char ip[INET6_ADDRSTRLEN]; - - if (inet_pton(AF_INET6, addr_str, ip)) { - srv_update_addr(sv, ip, AF_INET6, updater); - return NULL; - } - if (inet_pton(AF_INET, addr_str, ip)) { - srv_update_addr(sv, ip, AF_INET, updater); - return NULL; - } - - return "Could not understand IP address format.\n"; -} - -/* * Must be called with the server lock held. */ const char *server_parse_maxconn_change_request(struct server *sv, @@ -2399,9 +2434,9 @@ const char *server_parse_maxconn_change_request(struct server *sv, v = strtol(maxconn_str, &end, 10); if (end == maxconn_str) - return "maxconn string empty or preceded by garbage"; + return "maxconn string empty or preceded by garbage\n"; else if (end[0] != '\0') - return "Trailing garbage in maxconn string"; + return "Trailing garbage in maxconn string\n"; if (sv->maxconn == sv->minconn) { // static maxconn sv->maxconn = sv->minconn = v; @@ -2415,42 +2450,56 @@ const char *server_parse_maxconn_change_request(struct server *sv, return NULL; } -static struct sample_expr *srv_sni_sample_parse_expr(struct server *srv, struct proxy *px, - const char *file, int linenum, char **err) +/* Interpret <expr> as sample expression. This function is reserved for + * internal server allocation. On parsing use parse_srv_expr() for extra sample + * check validity. + * + * Returns the allocated sample on success or NULL on error. + */ +struct sample_expr *_parse_srv_expr(char *expr, struct arg_list *args_px, + const char *file, int linenum, char **err) { int idx; const char *args[] = { - srv->sni_expr, + expr, NULL, }; idx = 0; - px->conf.args.ctx = ARGC_SRV; + args_px->ctx = ARGC_SRV; - return sample_parse_expr((char **)args, &idx, file, linenum, err, &px->conf.args, NULL); + return sample_parse_expr((char **)args, &idx, file, linenum, err, args_px, NULL); } -int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err) +/* Interpret <str> if not empty as a sample expression and store it into <out>. + * Contrary to _parse_srv_expr(), fetch scope validity is checked to ensure it + * is valid on a server line context. It also updates <px> HTTP mode + * requirement depending on fetch method used. + * + * Returns 0 on success else non zero. + */ +static int parse_srv_expr(char *str, struct sample_expr **out, struct proxy *px, + char **err) { struct sample_expr *expr; - expr = srv_sni_sample_parse_expr(newsrv, px, px->conf.file, px->conf.line, err); - if (!expr) { - memprintf(err, "error detected while parsing sni expression : %s", *err); + if (!str) + return 0; + + expr = _parse_srv_expr(str, &px->conf.args, px->conf.file, px->conf.line, err); + if (!expr) return ERR_ALERT | ERR_FATAL; - } if (!(expr->fetch->val & SMP_VAL_BE_SRV_CON)) { - memprintf(err, "error detected while parsing sni expression : " - " fetch method '%s' extracts information from '%s', " + memprintf(err, "fetch method '%s' extracts information from '%s', " "none of which is available here.", - newsrv->sni_expr, sample_src_names(expr->fetch->use)); + str, sample_src_names(expr->fetch->use)); return ERR_ALERT | ERR_FATAL; } px->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY); - release_sample_expr(newsrv->ssl_ctx.sni); - newsrv->ssl_ctx.sni = expr; + release_sample_expr(*out); + *out = expr; return 0; } @@ -2634,6 +2683,45 @@ int srv_prepare_for_resolution(struct server *srv, const char *hostname) return -1; } +/* Initialize default values for <srv>. Used both for dynamic servers and + * default servers. The latter are not initialized via new_server(), hence this + * function purpose. For static servers, srv_settings_cpy() is used instead + * reusing their default server instance. + */ +void srv_settings_init(struct server *srv) +{ + srv->check.inter = DEF_CHKINTR; + srv->check.fastinter = 0; + srv->check.downinter = 0; + srv->check.rise = DEF_RISETIME; + srv->check.fall = DEF_FALLTIME; + srv->check.port = 0; + + srv->agent.inter = DEF_CHKINTR; + srv->agent.fastinter = 0; + srv->agent.downinter = 0; + srv->agent.rise = DEF_AGENT_RISETIME; + srv->agent.fall = DEF_AGENT_FALLTIME; + srv->agent.port = 0; + + srv->maxqueue = 0; + srv->minconn = 0; + srv->maxconn = 0; + + srv->max_reuse = -1; + srv->max_idle_conns = -1; + srv->pool_purge_delay = 5000; + + srv->slowstart = 0; + + srv->onerror = DEF_HANA_ONERR; + srv->consecutive_errors_limit = DEF_HANA_ERRLIMIT; + + srv->uweight = srv->iweight = 1; + + LIST_INIT(&srv->pp_tlvs); +} + /* * Copy <src> server settings to <srv> server allocating * everything needed. @@ -2704,6 +2792,7 @@ void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl srv->minconn = src->minconn; srv->maxconn = src->maxconn; srv->slowstart = src->slowstart; + srv->hash_key = src->hash_key; srv->observe = src->observe; srv->onerror = src->onerror; srv->onmarkeddown = src->onmarkeddown; @@ -2751,6 +2840,8 @@ void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl srv->tcp_ut = src->tcp_ut; #endif srv->mux_proto = src->mux_proto; + if (srv->pool_conn_name) + srv->pool_conn_name = strdup(srv->pool_conn_name); srv->pool_purge_delay = src->pool_purge_delay; srv->low_idle_conns = src->low_idle_conns; srv->max_idle_conns = src->max_idle_conns; @@ -2806,7 +2897,7 @@ struct server *new_server(struct proxy *proxy) srv->rid = 0; /* rid defaults to 0 */ srv->next_state = SRV_ST_RUNNING; /* early server setup */ - srv->last_change = ns_to_sec(now_ns); + srv->counters.last_change = ns_to_sec(now_ns); srv->check.obj_type = OBJ_TYPE_CHECK; srv->check.status = HCHK_STATUS_INI; @@ -2820,6 +2911,10 @@ struct server *new_server(struct proxy *proxy) srv->agent.proxy = proxy; srv->xprt = srv->check.xprt = srv->agent.xprt = xprt_get(XPRT_RAW); + MT_LIST_INIT(&srv->sess_conns); + + guid_init(&srv->guid); + srv->extra_counters = NULL; #ifdef USE_OPENSSL HA_RWLOCK_INIT(&srv->ssl_ctx.lock); @@ -2840,6 +2935,8 @@ void srv_take(struct server *srv) /* deallocate common server parameters (may be used by default-servers) */ void srv_free_params(struct server *srv) { + struct srv_pp_tlv_list *srv_tlv = NULL; + free(srv->cookie); free(srv->rdr_pfx); free(srv->hostname); @@ -2848,6 +2945,8 @@ void srv_free_params(struct server *srv) free(srv->per_thr); free(srv->per_tgrp); free(srv->curr_idle_thr); + free(srv->pool_conn_name); + release_sample_expr(srv->pool_conn_name_expr); free(srv->resolvers_id); free(srv->addr_node.key); free(srv->lb_nodes); @@ -2858,6 +2957,14 @@ void srv_free_params(struct server *srv) if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv) xprt_get(XPRT_SSL)->destroy_srv(srv); + + while (!LIST_ISEMPTY(&srv->pp_tlvs)) { + srv_tlv = LIST_ELEM(srv->pp_tlvs.n, struct srv_pp_tlv_list *, list); + LIST_DEL_INIT(&srv_tlv->list); + lf_expr_deinit(&srv_tlv->fmt); + ha_free(&srv_tlv->fmt_string); + ha_free(&srv_tlv); + } } /* Deallocate a server <srv> and its member. <srv> must be allocated. For @@ -2882,6 +2989,8 @@ struct server *srv_drop(struct server *srv) if (HA_ATOMIC_SUB_FETCH(&srv->refcount, 1)) goto end; + guid_remove(&srv->guid); + /* make sure we are removed from our 'next->prev_deleted' list * This doesn't require full thread isolation as we're using mt lists * However this could easily be turned into regular list if required @@ -3018,6 +3127,12 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) int i; struct server *newsrv; + /* Set the first server's ID. */ + _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low); + srv->conf.name.key = srv->id; + ebis_insert(&curproxy->conf.used_server_name, &srv->conf.name); + + /* then create other servers from this one */ for (i = srv->tmpl_info.nb_low + 1; i <= srv->tmpl_info.nb_high; i++) { newsrv = new_server(px); if (!newsrv) @@ -3029,8 +3144,21 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) srv_settings_cpy(newsrv, srv, 1); srv_prepare_for_resolution(newsrv, srv->hostname); + /* Use sni as fallback if pool_conn_name isn't set */ + if (!newsrv->pool_conn_name && newsrv->sni_expr) { + newsrv->pool_conn_name = strdup(newsrv->sni_expr); + if (!newsrv->pool_conn_name) + goto err; + } + + if (newsrv->pool_conn_name) { + newsrv->pool_conn_name_expr = _parse_srv_expr(srv->pool_conn_name, &px->conf.args, NULL, 0, NULL); + if (!newsrv->pool_conn_name_expr) + goto err; + } + if (newsrv->sni_expr) { - newsrv->ssl_ctx.sni = srv_sni_sample_parse_expr(newsrv, px, NULL, 0, NULL); + newsrv->ssl_ctx.sni = _parse_srv_expr(srv->sni_expr, &px->conf.args, NULL, 0, NULL); if (!newsrv->ssl_ctx.sni) goto err; } @@ -3045,6 +3173,9 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) /* Linked backwards first. This will be restablished after parsing. */ newsrv->next = px->srv; px->srv = newsrv; + + newsrv->conf.name.key = newsrv->id; + ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); } _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low); @@ -3316,30 +3447,18 @@ static int _srv_parse_init(struct server **srv, char **args, int *cur_arg, /* Copy default server settings to new server */ srv_settings_cpy(newsrv, &curproxy->defsrv, 0); } else { - /* Initialize dynamic server weight to 1 */ - newsrv->uweight = newsrv->iweight = 1; + srv_settings_init(newsrv); /* A dynamic server is disabled on startup */ newsrv->next_admin = SRV_ADMF_FMAINT; newsrv->next_state = SRV_ST_STOPPED; server_recalc_eweight(newsrv, 0); - - /* Set default values for checks */ - newsrv->check.inter = DEF_CHKINTR; - newsrv->check.rise = DEF_RISETIME; - newsrv->check.fall = DEF_FALLTIME; - - newsrv->agent.inter = DEF_CHKINTR; - newsrv->agent.rise = DEF_AGENT_RISETIME; - newsrv->agent.fall = DEF_AGENT_FALLTIME; } HA_SPIN_INIT(&newsrv->lock); } else { *srv = newsrv = &curproxy->defsrv; *cur_arg = 1; - newsrv->resolv_opts.family_prio = AF_INET6; - newsrv->resolv_opts.accept_duplicate_ip = 0; } free(fqdn); @@ -3426,25 +3545,6 @@ out: return err_code; } -/* This function is first intended to be used through parse_server to - * initialize a new server on startup. - */ -static int _srv_parse_sni_expr_init(char **args, int cur_arg, - struct server *srv, struct proxy *proxy, - char **errmsg) -{ - int ret; - - if (!srv->sni_expr) - return 0; - - ret = server_parse_sni_expr(srv, proxy, errmsg); - if (!ret) - return 0; - - return ret; -} - /* Server initializations finalization. * Initialize health check, agent check, SNI expression and outgoing TLVs if enabled. * Must not be called for a default server instance. @@ -3471,9 +3571,27 @@ static int _srv_parse_finalize(char **args, int cur_arg, return ERR_ALERT | ERR_FATAL; } - if ((ret = _srv_parse_sni_expr_init(args, cur_arg, srv, px, &errmsg)) != 0) { + if ((ret = parse_srv_expr(srv->sni_expr, &srv->ssl_ctx.sni, px, &errmsg))) { if (errmsg) { - ha_alert("%s\n", errmsg); + ha_alert("error detected while parsing sni expression : %s.\n", errmsg); + free(errmsg); + } + return ret; + } + + /* Use sni as fallback if pool_conn_name isn't set */ + if (!srv->pool_conn_name && srv->sni_expr) { + srv->pool_conn_name = strdup(srv->sni_expr); + if (!srv->pool_conn_name) { + ha_alert("out of memory\n"); + return ERR_ALERT | ERR_FATAL; + } + } + + if ((ret = parse_srv_expr(srv->pool_conn_name, &srv->pool_conn_name_expr, + px, &errmsg))) { + if (errmsg) { + ha_alert("error detected while parsing pool-conn-name expression : %s.\n", errmsg); free(errmsg); } return ret; @@ -3490,7 +3608,7 @@ static int _srv_parse_finalize(char **args, int cur_arg, } list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) { - LIST_INIT(&srv_tlv->fmt); + lf_expr_init(&srv_tlv->fmt); if (srv_tlv->fmt_string && unlikely(!parse_logformat_string(srv_tlv->fmt_string, srv->proxy, &srv_tlv->fmt, 0, SMP_VAL_BE_SRV_CON, &errmsg))) { if (errmsg) { @@ -3562,8 +3680,13 @@ int parse_server(const char *file, int linenum, char **args, goto out; } - if (parse_flags & SRV_PARSE_TEMPLATE) + if (parse_flags & SRV_PARSE_TEMPLATE) { _srv_parse_tmpl_init(newsrv, curproxy); + } + else if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) { + newsrv->conf.name.key = newsrv->id; + ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); + } /* If the server id is fixed, insert it in the proxy used_id tree. * This is needed to detect a later duplicate id via srv_parse_id. @@ -3610,6 +3733,25 @@ struct server *server_find_by_id(struct proxy *bk, int id) return curserver; } +/* + * This function finds a server with matching "<puid> x <rid>" within + * selected backend <bk>. + * Using the combination of proxy-uid + revision id ensures that the function + * will either return the server we're expecting or NULL if it has been removed + * from the proxy (<id> is unique within the list, but it is not true over the + * process lifetime as new servers may reuse the id of a previously deleted + * server). + */ +struct server *server_find_by_id_unique(struct proxy *bk, int id, uint32_t rid) +{ + struct server *curserver; + + curserver = server_find_by_id(bk, id); + if (!curserver || curserver->rid != rid) + return NULL; + return curserver; +} + /* Returns a pointer to the first server matching either name <name>, or id * if <name> starts with a '#'. NULL is returned if no match is found. * the lookup is performed in the backend <bk> @@ -3628,20 +3770,43 @@ struct server *server_find_by_name(struct proxy *bk, const char *name) curserver = NULL; if (*name == '#') { curserver = server_find_by_id(bk, atoi(name + 1)); - if (curserver) - return curserver; } else { - curserver = bk->srv; - - while (curserver && (strcmp(curserver->id, name) != 0)) - curserver = curserver->next; + struct ebpt_node *node; - if (curserver) - return curserver; + node = ebis_lookup(&bk->conf.used_server_name, name); + if (node) + curserver = container_of(node, struct server, conf.name); } - return NULL; + return curserver; +} + +/* + * This function finds a server with matching "<name> x <rid>" within + * selected backend <bk>. + * Using the combination of name + revision id ensures that the function + * will either return the server we're expecting or NULL if it has been removed + * from the proxy. For this we assume that <name> is unique within the list, + * which is the case in most setups, but in rare cases the user may have + * enforced duplicate server names in the initial config (ie: if he intends to + * use numerical IDs for identification instead). In this particular case, the + * function will not work as expected so server_find_by_id_unique() should be + * used to match a unique server instead. + * + * Just like server_find_by_id_unique(), if a server is deleted and a new server + * reuses the same name, the rid check will prevent the function from returning + * a different server from the one we were expecting to match against at a given + * time. + */ +struct server *server_find_by_name_unique(struct proxy *bk, const char *name, uint32_t rid) +{ + struct server *curserver; + + curserver = server_find_by_name(bk, name); + if (!curserver || curserver->rid != rid) + return NULL; + return curserver; } struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff) @@ -3705,101 +3870,332 @@ struct server *server_find_best_match(struct proxy *bk, char *name, int id, int return NULL; } -/* - * update a server's current IP address. - * ip is a pointer to the new IP address, whose address family is ip_sin_family. - * ip is in network format. - * updater is a string which contains an information about the requester of the update. - * updater is used if not NULL. +/* This functions retrieves server's addr and port to fill + * <inetaddr> struct passed as argument. * - * A log line and a stderr warning message is generated based on server's backend options. - * - * Must be called with the server lock held. + * This may only be used under inet context. */ -int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater) +void server_get_inetaddr(struct server *s, struct server_inetaddr *inetaddr) { - union { - struct event_hdl_cb_data_server_inetaddr addr; - struct event_hdl_cb_data_server common; - } cb_data; - struct sockaddr_storage new_addr = { }; // shut up gcc warning + struct sockaddr_storage *addr = &s->addr; + unsigned int port = s->svc_port; + uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS); - /* save the new IP family & address if necessary */ - switch (ip_sin_family) { - case AF_INET: - if (s->addr.ss_family == ip_sin_family && - !memcmp(ip, &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr, 4)) - return 0; - break; - case AF_INET6: - if (s->addr.ss_family == ip_sin_family && - !memcmp(ip, &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr, 16)) - return 0; - break; - }; + /* only INET families are supported */ + BUG_ON((addr->ss_family != AF_UNSPEC && + addr->ss_family != AF_INET && addr->ss_family != AF_INET6)); - /* generates a log line and a warning on stderr */ - if (1) { - /* book enough space for both IPv4 and IPv6 */ - char oldip[INET6_ADDRSTRLEN]; - char newip[INET6_ADDRSTRLEN]; + inetaddr->family = addr->ss_family; + memset(&inetaddr->addr, 0, sizeof(inetaddr->addr)); - memset(oldip, '\0', INET6_ADDRSTRLEN); - memset(newip, '\0', INET6_ADDRSTRLEN); + if (addr->ss_family == AF_INET) + inetaddr->addr.v4 = + ((struct sockaddr_in *)addr)->sin_addr; + else if (addr->ss_family == AF_INET6) + inetaddr->addr.v6 = + ((struct sockaddr_in6 *)addr)->sin6_addr; - /* copy old IP address in a string */ - switch (s->addr.ss_family) { - case AF_INET: - inet_ntop(s->addr.ss_family, &((struct sockaddr_in *)&s->addr)->sin_addr, oldip, INET_ADDRSTRLEN); + inetaddr->port.svc = port; + inetaddr->port.map = mapports; +} + +/* get human readable name for server_inetaddr_updater .by struct member + */ +const char *server_inetaddr_updater_by_to_str(enum server_inetaddr_updater_by by) +{ + switch (by) { + case SERVER_INETADDR_UPDATER_BY_CLI: + return "stats socket command"; + case SERVER_INETADDR_UPDATER_BY_LUA: + return "Lua script"; + case SERVER_INETADDR_UPDATER_BY_DNS_AR: + return "DNS additional record"; + case SERVER_INETADDR_UPDATER_BY_DNS_CACHE: + return "DNS cache"; + case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER: + return "DNS resolver"; + default: + /* unknown, don't mention updater */ break; - case AF_INET6: - inet_ntop(s->addr.ss_family, &((struct sockaddr_in6 *)&s->addr)->sin6_addr, oldip, INET6_ADDRSTRLEN); + } + return NULL; +} + +/* append inetaddr updater info to chunk <out> + */ +static void _srv_append_inetaddr_updater_info(struct buffer *out, + struct server *s, + struct server_inetaddr_updater updater) +{ + switch (updater.by) { + case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER: + /* we need to report the resolver/nameserver id which is + * responsible for the update + */ + { + struct resolvers *r = s->resolvers; + struct dns_nameserver *ns; + + /* we already know that the update comes from the + * resolver section linked to the server, but we + * need to find out which nameserver handled the dns + * query + */ + BUG_ON(!r); + ns = find_nameserver_by_resolvers_and_id(r, updater.u.dns_resolver.ns_id); + BUG_ON(!ns); + chunk_appendf(out, " by '%s/%s'", r->id, ns->id); + } break; default: - strlcpy2(oldip, "(none)", sizeof(oldip)); + { + const char *by_name; + + by_name = server_inetaddr_updater_by_to_str(updater.by); + if (by_name) + chunk_appendf(out, " by '%s'", by_name); + } break; - }; + } +} - /* copy new IP address in a string */ - switch (ip_sin_family) { +/* server_set_inetaddr() helper */ +static void _addr_to_str(int family, const void *addr, char *addr_str, size_t len) +{ + memset(addr_str, 0, len); + switch (family) { case AF_INET: - inet_ntop(ip_sin_family, ip, newip, INET_ADDRSTRLEN); - break; case AF_INET6: - inet_ntop(ip_sin_family, ip, newip, INET6_ADDRSTRLEN); + inet_ntop(family, addr, addr_str, len); break; - }; + default: + strlcpy2(addr_str, "(none)", len); + break; + } +} +/* server_set_inetaddr() helper */ +static int _inetaddr_addr_cmp(const struct server_inetaddr *inetaddr, const struct sockaddr_storage *addr) +{ + struct in_addr *v4; + struct in6_addr *v6; + + if (inetaddr->family != addr->ss_family) + return 1; + + if (inetaddr->family == AF_INET) { + v4 = &((struct sockaddr_in *)addr)->sin_addr; + if (memcmp(&inetaddr->addr.v4, v4, sizeof(struct in_addr))) + return 1; + } + else if (inetaddr->family == AF_INET6) { + v6 = &((struct sockaddr_in6 *)addr)->sin6_addr; + if (memcmp(&inetaddr->addr.v6, v6, sizeof(struct in6_addr))) + return 1; + } + + return 0; // both inetaddr storage are equivalent +} + +/* This function sets a server's addr and port in inet context based on new + * inetaddr input + * + * The function first does the following, in that order: + * - checks if an update is required (new IP or port is different than current + * one) + * - check the update is allowed: + * - allow all changes if no CHECKS are configured + * - if CHECK is configured: + * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their + * own ports + * - applies required changes to both ADDR and PORT if both 'required' and + * 'allowed' conditions are met. + * + * Caller can pass <msg> buffer so that it gets some information about the + * operation. It may as well provide <updater> so that messages mention that + * the update was performed on the behalf of it. + * + * <inetaddr> family may be set to UNSPEC to reset server's addr + * + * Caller must set <inetaddr>->port.map to 1 if <inetaddr>->port.svc must be + * handled as an offset + * + * The function returns 1 if an update was performed and 0 if nothing was + * changed. + */ +int server_set_inetaddr(struct server *s, + const struct server_inetaddr *inetaddr, + struct server_inetaddr_updater updater, struct buffer *msg) +{ + union { + struct event_hdl_cb_data_server_inetaddr addr; + struct event_hdl_cb_data_server common; + } cb_data; + char addr_str[INET6_ADDRSTRLEN]; + uint16_t current_port; + uint8_t ip_change = 0; + uint8_t port_change = 0; + int ret = 0; + + /* only INET families are supported */ + BUG_ON((inetaddr->family != AF_UNSPEC && + inetaddr->family != AF_INET && inetaddr->family != AF_INET6) || + (s->addr.ss_family != AF_UNSPEC && + s->addr.ss_family != AF_INET && s->addr.ss_family != AF_INET6)); + + /* ignore if no change */ + if (!_inetaddr_addr_cmp(inetaddr, &s->addr)) + goto port; + + ip_change = 1; + + /* update report for caller */ + if (msg) { + void *from_ptr = NULL; + + if (s->addr.ss_family == AF_INET) + from_ptr = &((struct sockaddr_in *)&s->addr)->sin_addr; + else if (s->addr.ss_family == AF_INET6) + from_ptr = &((struct sockaddr_in6 *)&s->addr)->sin6_addr; - /* save log line into a buffer */ - chunk_printf(&trash, "%s/%s changed its IP from %s to %s by %s", - s->proxy->id, s->id, oldip, newip, updater); + _addr_to_str(s->addr.ss_family, from_ptr, addr_str, sizeof(addr_str)); + chunk_printf(msg, "IP changed from '%s'", addr_str); + _addr_to_str(inetaddr->family, &inetaddr->addr, addr_str, sizeof(addr_str)); + chunk_appendf(msg, " to '%s'", addr_str); + } + + if (inetaddr->family == AF_UNSPEC) + goto out; // ignore port information when unsetting addr + + port: + /* collection data currently setup */ + current_port = s->svc_port; + + /* check if caller triggers a port mapped or offset */ + if (inetaddr->port.map) { + /* check if server currently uses port map */ + if (!(s->flags & SRV_F_MAPPORTS)) { + /* we're switching from a fixed port to a SRV_F_MAPPORTS + * (mapped) port, prevent PORT change if check is enabled + * and it doesn't have it's dedicated port while switching + * to port mapping + */ + if ((s->check.state & CHK_ST_ENABLED) && !s->check.port) { + if (msg) { + if (ip_change) + chunk_appendf(msg, ", "); + chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive)."); + } + goto out; + } + /* switch from fixed port to port map mandatorily triggers + * a port change + */ + port_change = 1; + } + /* else we're already using port maps */ + else { + port_change = current_port != inetaddr->port.svc; + } + } + /* fixed port */ + else { + if ((s->flags & SRV_F_MAPPORTS)) + port_change = 1; // changing from mapped to fixed + else + port_change = current_port != inetaddr->port.svc; + } + + /* update response message about PORT change */ + if (port_change && msg) { + if (ip_change) + chunk_appendf(msg, ", "); + + chunk_appendf(msg, "port changed from '"); + if (s->flags & SRV_F_MAPPORTS) + chunk_appendf(msg, "+"); + + chunk_appendf(msg, "%d' to '", s->svc_port); + if (inetaddr->port.map) + chunk_appendf(msg, "+"); + chunk_appendf(msg, "%d'", inetaddr->port.svc); + } + + out: + if (ip_change || port_change) { + _srv_event_hdl_prepare(&cb_data.common, s, 0); + _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, + inetaddr, + updater); + + /* server_atomic_sync_task will apply the changes for us */ + _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); + + ret = 1; + } + + if (ret && msg && updater.by != SERVER_INETADDR_UPDATER_BY_NONE) + _srv_append_inetaddr_updater_info(msg, s, updater); + return ret; +} + +/* Sets new server's addr and/or svc_port, then send a log and report a + * warning on stderr if something has changed. + * + * Returns 1 if something has changed, 0 otherwise. + * see server_set_inetaddr() for more information. + */ +int server_set_inetaddr_warn(struct server *s, + const struct server_inetaddr *inetaddr, + struct server_inetaddr_updater updater) +{ + struct buffer *msg = get_trash_chunk(); + int ret; + + chunk_reset(msg); + + ret = server_set_inetaddr(s, inetaddr, updater, msg); + if (msg->data) { /* write the buffer on stderr */ - ha_warning("%s.\n", trash.area); + ha_warning("%s/%s: %s.\n", s->proxy->id, s->id, msg->area); /* send a log */ - send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area); + send_log(s->proxy, LOG_NOTICE, "%s/%s: %s.\n", s->proxy->id, s->id, msg->area); } + return ret; +} + +/* + * update a server's current IP address. + * ip is a pointer to the new IP address, whose address family is ip_sin_family. + * ip is in network format. + * updater is a string which contains an information about the requester of the update. + * updater is used if not NULL. + * + * A log line and a stderr warning message is generated based on server's backend options. + * + * Must be called with the server lock held. + */ +int srv_update_addr(struct server *s, void *ip, int ip_sin_family, struct server_inetaddr_updater updater) +{ + struct server_inetaddr inetaddr; + + server_get_inetaddr(s, &inetaddr); + BUG_ON(ip_sin_family != AF_INET && ip_sin_family != AF_INET6); /* save the new IP family */ - new_addr.ss_family = ip_sin_family; + inetaddr.family = ip_sin_family; /* save the new IP address */ switch (ip_sin_family) { case AF_INET: - memcpy(&((struct sockaddr_in *)&new_addr)->sin_addr.s_addr, ip, 4); + memcpy(&inetaddr.addr.v4, ip, 4); break; case AF_INET6: - memcpy(((struct sockaddr_in6 *)&new_addr)->sin6_addr.s6_addr, ip, 16); + memcpy(&inetaddr.addr.v6, ip, 16); break; }; - _srv_event_hdl_prepare(&cb_data.common, s, 0); - _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, - &new_addr, s->svc_port, !!(s->flags & SRV_F_MAPPORTS), - 0); - - /* server_atomic_sync_task will apply the changes for us */ - _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); + server_set_inetaddr_warn(s, &inetaddr, updater); return 0; } @@ -3906,40 +4302,37 @@ out: /* * This function update a server's addr and port only for AF_INET and AF_INET6 families. * - * Caller can pass its name through <updater> to get it integrated in the response message - * returned by the function. + * Caller can pass its info through <updater> to get it integrated in the response + * message returned by the function. * * The function first does the following, in that order: + * - checks that don't switch from/to a family other than AF_INET and AF_INET6 * - validates the new addr and/or port - * - checks if an update is required (new IP or port is different than current ones) - * - checks the update is allowed: - * - don't switch from/to a family other than AF_INET4 and AF_INET6 - * - allow all changes if no CHECKS are configured - * - if CHECK is configured: - * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their own ports - * - applies required changes to both ADDR and PORT if both 'required' and 'allowed' - * conditions are met + * - calls server_set_inetaddr() to check and apply the change * * Must be called with the server lock held. */ -const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater) +const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, + struct server_inetaddr_updater updater) { - union { - struct event_hdl_cb_data_server_inetaddr addr; - struct event_hdl_cb_data_server common; - } cb_data; struct sockaddr_storage sa; - int ret; - char current_addr[INET6_ADDRSTRLEN]; - uint16_t current_port, new_port = 0; + struct server_inetaddr inetaddr; struct buffer *msg; - int ip_change = 0; - int port_change = 0; - uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS); + int ret; msg = get_trash_chunk(); chunk_reset(msg); + /* even a simple port change is not supported outside of inet context, because + * s->svc_port is only relevant under inet context + */ + if ((s->addr.ss_family != AF_INET) && (s->addr.ss_family != AF_INET6)) { + chunk_printf(msg, "Update for the current server address family is only supported through configuration file."); + goto out; + } + + server_get_inetaddr(s, &inetaddr); + if (addr) { memset(&sa, 0, sizeof(struct sockaddr_storage)); if (str2ip2(addr, &sa, 0) == NULL) { @@ -3953,40 +4346,24 @@ const char *srv_update_addr_port(struct server *s, const char *addr, const char goto out; } - /* collecting data currently setup */ - memset(current_addr, '\0', sizeof(current_addr)); - ret = addr_to_str(&s->addr, current_addr, sizeof(current_addr)); - /* changes are allowed on AF_INET* families only */ - if ((ret != AF_INET) && (ret != AF_INET6)) { - chunk_printf(msg, "Update for the current server address family is only supported through configuration file"); - goto out; - } - - /* applying ADDR changes if required and allowed - * ipcmp returns 0 when both ADDR are the same - */ - if (ipcmp(&s->addr, &sa, 0) == 0) { - chunk_appendf(msg, "no need to change the addr"); - goto port; + inetaddr.family = sa.ss_family; + switch (inetaddr.family) { + case AF_INET: + inetaddr.addr.v4 = ((struct sockaddr_in *)&sa)->sin_addr; + break; + case AF_INET6: + inetaddr.addr.v6 = ((struct sockaddr_in6 *)&sa)->sin6_addr; + break; } - ip_change = 1; - - /* update report for caller */ - chunk_printf(msg, "IP changed from '%s' to '%s'", current_addr, addr); } - port: if (port) { + uint16_t new_port; char sign = '\0'; char *endptr; - if (addr) - chunk_appendf(msg, ", "); - - /* collecting data currently setup */ - current_port = s->svc_port; - sign = *port; + errno = 0; new_port = strtol(port, &endptr, 10); if ((errno != 0) || (port == endptr)) { @@ -3995,98 +4372,46 @@ const char *srv_update_addr_port(struct server *s, const char *addr, const char } /* check if caller triggers a port mapped or offset */ - if (sign == '-' || (sign == '+')) { - /* check if server currently uses port map */ - if (!(s->flags & SRV_F_MAPPORTS)) { - /* check is configured - * we're switching from a fixed port to a SRV_F_MAPPORTS (mapped) port - * prevent PORT change if check doesn't have it's dedicated port while switching - * to port mapping */ - if (!s->check.port) { - chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive."); - goto out; - } - /* switch from fixed port to port map mandatorily triggers - * a port change */ - port_change = 1; - } - /* we're already using port maps */ - else { - port_change = current_port != new_port; - } - } - /* fixed port */ - else { - port_change = current_port != new_port; - } - - /* applying PORT changes if required and update response message */ - if (port_change) { - uint16_t new_port_print = new_port; - - /* prepare message */ - chunk_appendf(msg, "port changed from '"); - if (s->flags & SRV_F_MAPPORTS) - chunk_appendf(msg, "+"); - chunk_appendf(msg, "%d' to '", current_port); - - if (sign == '-') { - mapports = 1; - chunk_appendf(msg, "%c", sign); - /* just use for result output */ - new_port_print = -new_port_print; - } - else if (sign == '+') { - mapports = 1; - chunk_appendf(msg, "%c", sign); - } - else { - mapports = 0; - } - - chunk_appendf(msg, "%d'", new_port_print); - } - else { - chunk_appendf(msg, "no need to change the port"); - } + if (sign == '-' || sign == '+') + inetaddr.port.map = 1; + else + inetaddr.port.map = 0; + + inetaddr.port.svc = new_port; + + /* note: negative offset was converted to positive offset + * (new_port is unsigned) to prevent later conversions errors + * since svc_port is handled as an unsigned int all along the + * chain. Unfortunately this is a one-way operation so the user + * could be surprised to see a negative offset reported using + * its equivalent positive offset in the generated message + * (-X = +(65535 - (X-1))), but thanks to proper wraparound it + * will be interpreted as a negative offset during port + * remapping so it will work as expected. + */ } -out: - if (ip_change || port_change) { - _srv_event_hdl_prepare(&cb_data.common, s, 0); - _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, - ((ip_change) ? &sa : &s->addr), - ((port_change) ? new_port : s->svc_port), mapports, - 1); + ret = server_set_inetaddr(s, &inetaddr, updater, msg); + if (!ret) + chunk_printf(msg, "nothing changed"); - /* server_atomic_sync_task will apply the changes for us */ - _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); - } - if (updater) - chunk_appendf(msg, " by '%s'", updater); - chunk_appendf(msg, "\n"); + out: return msg->area; } /* - * update server status based on result of SRV resolution + * put the server in maintenance because of failing SRV resolution * returns: - * 0 if server status is updated + * 0 if server was put under maintenance * 1 if server status has not changed * * Must be called with the server lock held. */ -int srvrq_update_srv_status(struct server *s, int has_no_ip) +int srvrq_set_srv_down(struct server *s) { if (!s->srvrq) return 1; - /* since this server has an IP, it can go back in production */ - if (has_no_ip == 0) { - srv_clr_admin_flag(s, SRV_ADMF_RMAINT); - return 1; - } - if (s->next_admin & SRV_ADMF_RMAINT) return 1; @@ -4095,59 +4420,46 @@ int srvrq_update_srv_status(struct server *s, int has_no_ip) } /* - * update server status based on result of name resolution + * put server under maintenance as a result of name resolution * returns: - * 0 if server status is updated + * 0 if server was put under maintenance * 1 if server status has not changed * * Must be called with the server lock held. */ -int snr_update_srv_status(struct server *s, int has_no_ip) +int snr_set_srv_down(struct server *s) { struct resolvers *resolvers = s->resolvers; struct resolv_resolution *resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL); int exp; + /* server already under maintenance */ + if (s->next_admin & SRV_ADMF_RMAINT) + goto out; + /* If resolution is NULL we're dealing with SRV records Additional records */ if (resolution == NULL) - return srvrq_update_srv_status(s, has_no_ip); + return srvrq_set_srv_down(s); switch (resolution->status) { case RSLV_STATUS_NONE: /* status when HAProxy has just (re)started. * Nothing to do, since the task is already automatically started */ - break; + goto out; case RSLV_STATUS_VALID: /* - * resume health checks - * server will be turned back on if health check is safe + * valid resolution but no usable server address */ - if (has_no_ip) { - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; - srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP); - return 0; - } - - if (!(s->next_admin & SRV_ADMF_RMAINT)) - return 1; - srv_clr_admin_flag(s, SRV_ADMF_RMAINT); - chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer", - s->proxy->id, s->id); - - ha_warning("%s.\n", trash.area); - send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area); + srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP); return 0; case RSLV_STATUS_NX: /* stop server if resolution is NX for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.nx); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NX); return 0; @@ -4155,10 +4467,8 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution is TIMEOUT for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.timeout); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_TIMEOUT); return 0; @@ -4166,10 +4476,8 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution is REFUSED for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.refused); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_REFUSED); return 0; @@ -4177,14 +4485,13 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution failed for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.other); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_UNSPEC); return 0; } + out: return 1; } @@ -4210,7 +4517,6 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c void *serverip, *firstip; short server_sin_family, firstip_sin_family; int ret; - struct buffer *chk = get_trash_chunk(); int has_no_ip = 0; s = objt_server(requester->owner); @@ -4269,12 +4575,6 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c has_no_ip = 1; goto update_status; - case RSLV_UPD_NAME_ERROR: - /* update resolution status to OTHER error type */ - resolution->status = RSLV_STATUS_OTHER; - has_no_ip = 1; - goto update_status; - default: has_no_ip = 1; goto invalid; @@ -4285,15 +4585,21 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c if (counters) { counters->app.resolver.update++; /* save the first ip we found */ - chunk_printf(chk, "%s/%s", counters->pid, counters->id); + srv_update_addr(s, firstip, firstip_sin_family, + SERVER_INETADDR_UPDATER_DNS_RESOLVER(counters->ns_puid)); } else - chunk_printf(chk, "DNS cache"); - srv_update_addr(s, firstip, firstip_sin_family, (char *) chk->area); + srv_update_addr(s, firstip, firstip_sin_family, SERVER_INETADDR_UPDATER_DNS_CACHE); update_status: - if (!snr_update_srv_status(s, has_no_ip) && has_no_ip) - memset(&s->addr, 0, sizeof(s->addr)); + if (has_no_ip && !snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + } return 1; invalid: @@ -4301,8 +4607,14 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c counters->app.resolver.invalid++; goto update_status; } - if (!snr_update_srv_status(s, has_no_ip) && has_no_ip) - memset(&s->addr, 0, sizeof(s->addr)); + if (has_no_ip && !snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + } return 0; } @@ -4382,8 +4694,13 @@ int snr_resolution_error_cb(struct resolv_requester *requester, int error_code) return 0; HA_SPIN_LOCK(SERVER_LOCK, &s->lock); - if (!snr_update_srv_status(s, 1)) { - memset(&s->addr, 0, sizeof(s->addr)); + if (!snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock); resolv_detach_from_resolution_answer_items(requester->resolution, requester); return 0; @@ -4739,16 +5056,16 @@ struct server *cli_find_server(struct appctx *appctx, char *arg) be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) { - cli_err(appctx, "Require 'backend/server'."); + cli_err(appctx, "Require 'backend/server'.\n"); return NULL; } if (!(px = proxy_be_by_name(ist0(be_name)))) { - cli_err(appctx, "No such backend."); + cli_err(appctx, "No such backend.\n"); return NULL; } if (!(sv = server_find_by_name(px, ist0(sv_name)))) { - cli_err(appctx, "No such server."); + cli_err(appctx, "No such server.\n"); return NULL; } @@ -4915,10 +5232,9 @@ static int cli_parse_set_server(char **args, char *payload, struct appctx *appct port = args[6]; } HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); - warning = srv_update_addr_port(sv, addr, port, "stats socket command"); + warning = srv_update_addr_port(sv, addr, port, SERVER_INETADDR_UPDATER_CLI); if (warning) cli_msg(appctx, LOG_WARNING, warning); - srv_clr_admin_flag(sv, SRV_ADMF_RMAINT); HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); } else if (strcmp(args[3], "fqdn") == 0) { @@ -4994,12 +5310,12 @@ static int cli_parse_get_weight(char **args, char *payload, struct appctx *appct be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) - return cli_err(appctx, "Require 'backend/server'."); + return cli_err(appctx, "Require 'backend/server'.\n"); if (!(be = proxy_be_by_name(ist0(be_name)))) - return cli_err(appctx, "No such backend."); + return cli_err(appctx, "No such backend.\n"); if (!(sv = server_find_by_name(be, ist0(sv_name)))) - return cli_err(appctx, "No such server."); + return cli_err(appctx, "No such server.\n"); /* return server's effective weight at the moment */ snprintf(trash.area, trash.size, "%d (initial %d)\n", sv->uweight, @@ -5234,7 +5550,7 @@ static int srv_alloc_lb(struct server *sv, struct proxy *be) /* updates the server's weight during a warmup stage. Once the final weight is * reached, the task automatically stops. Note that any server status change - * must have updated s->last_change accordingly. + * must have updated s->counters.last_change accordingly. */ static struct task *server_warmup(struct task *t, void *context, unsigned int state) { @@ -5290,7 +5606,7 @@ static int init_srv_slowstart(struct server *srv) if (srv->next_state == SRV_ST_STARTING) { task_schedule(srv->warmup, tick_add(now_ms, - MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20))); + MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->counters.last_change)) / 20))); } } @@ -5352,19 +5668,19 @@ static int cli_parse_add_server(char **args, char *payload, struct appctx *appct } if (!*sv_name) - return cli_err(appctx, "Require 'backend/server'."); + return cli_err(appctx, "Require 'backend/server'.\n"); be = proxy_be_by_name(be_name); if (!be) - return cli_err(appctx, "No such backend."); + return cli_err(appctx, "No such backend.\n"); if (!(be->lbprm.algo & BE_LB_PROP_DYN)) { - cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers."); + cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers.\n"); return 1; } if (be->mode == PR_MODE_SYSLOG) { - cli_err(appctx," Dynamic servers cannot be used with log backends."); + cli_err(appctx," Dynamic servers cannot be used with log backends.\n"); return 1; } @@ -5554,11 +5870,11 @@ static int cli_parse_add_server(char **args, char *payload, struct appctx *appct */ if (srv->check.state & CHK_ST_CONFIGURED) { if (!start_check_task(&srv->check, 0, 1, 1)) - ha_alert("System might be unstable, consider to execute a reload"); + ha_alert("System might be unstable, consider to execute a reload\n"); } if (srv->agent.state & CHK_ST_CONFIGURED) { if (!start_check_task(&srv->agent, 0, 1, 1)) - ha_alert("System might be unstable, consider to execute a reload"); + ha_alert("System might be unstable, consider to execute a reload\n"); } if (srv->cklen && be->mode != PR_MODE_HTTP) @@ -5594,6 +5910,72 @@ out: return 1; } +/* Check if the server <bename>/<svname> exists and is ready for being deleted. + * Both <bename> and <svname> must be valid strings. This must be called under + * thread isolation. If pb/ps are not null, upon success, the pointer to + * the backend and server respectively will be put there. If pm is not null, + * a pointer to an error/success message is returned there (possibly NULL if + * nothing to say). Returned values: + * >0 if OK + * 0 if not yet (should wait if it can) + * <0 if not possible + */ +int srv_check_for_deletion(const char *bename, const char *svname, struct proxy **pb, struct server **ps, const char **pm) +{ + struct server *srv = NULL; + struct proxy *be = NULL; + const char *msg = NULL; + int ret; + + /* First, unrecoverable errors */ + ret = -1; + + if (!(be = proxy_be_by_name(bename))) { + msg = "No such backend."; + goto leave; + } + + if (!(srv = server_find_by_name(be, svname))) { + msg = "No such server."; + goto leave; + } + + if (srv->flags & SRV_F_NON_PURGEABLE) { + msg = "This server cannot be removed at runtime due to other configuration elements pointing to it."; + goto leave; + } + + /* Only servers in maintenance can be deleted. This ensures that the + * server is not present anymore in the lb structures (through + * lbprm.set_server_status_down). + */ + if (!(srv->cur_admin & SRV_ADMF_MAINT)) { + msg = "Only servers in maintenance mode can be deleted."; + goto leave; + } + + /* Second, conditions that may change over time */ + ret = 0; + + /* Ensure that there is no active/pending connection on the server. */ + if (srv->curr_used_conns || + !eb_is_empty(&srv->queue.head) || srv_has_streams(srv)) { + msg = "Server still has connections attached to it, cannot remove it."; + goto leave; + } + + /* OK, let's go */ + ret = 1; +leave: + if (pb) + *pb = be; + if (ps) + *ps = srv; + if (pm) + *pm = msg; + return ret; +} + /* Parse a "del server" command * Returns 0 if the server has been successfully initialized, 1 on failure. */ @@ -5603,6 +5985,10 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap struct server *srv; struct server *prev_del; struct ist be_name, sv_name; + struct mt_list *elt1, elt2; + struct sess_priv_conns *sess_conns = NULL; + const char *msg; + int ret, i; if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) return 1; @@ -5620,42 +6006,71 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap sv_name = ist(args[1]); be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) { - cli_err(appctx, "Require 'backend/server'."); + cli_err(appctx, "Require 'backend/server'.\n"); goto out; } - if (!(be = proxy_be_by_name(ist0(be_name)))) { - cli_err(appctx, "No such backend."); - goto out; - } - if (!(srv = server_find_by_name(be, ist0(sv_name)))) { - cli_err(appctx, "No such server."); + ret = srv_check_for_deletion(ist0(be_name), ist0(sv_name), &be, &srv, &msg); + if (ret <= 0) { + /* failure (recoverable or not) */ + cli_err(appctx, msg); goto out; } - if (srv->flags & SRV_F_NON_PURGEABLE) { - cli_err(appctx, "This server cannot be removed at runtime due to other configuration elements pointing to it."); - goto out; - } + /* Close idle connections attached to this server. */ + for (i = tid;;) { + struct list *list = &srv->per_thr[i].idle_conn_list; + struct connection *conn; + + while (!LIST_ISEMPTY(list)) { + conn = LIST_ELEM(list->n, struct connection *, idle_list); + if (i != tid) { + if (conn->mux && conn->mux->takeover) + conn->mux->takeover(conn, i, 1); + else if (conn->xprt && conn->xprt->takeover) + conn->xprt->takeover(conn, conn->ctx, i, 1); + } + conn_release(conn); + } - /* Only servers in maintenance can be deleted. This ensures that the - * server is not present anymore in the lb structures (through - * lbprm.set_server_status_down). - */ - if (!(srv->cur_admin & SRV_ADMF_MAINT)) { - cli_err(appctx, "Only servers in maintenance mode can be deleted."); - goto out; + /* Also remove all purgeable conns as some of them may still + * reference the currently deleted server. + */ + while ((conn = MT_LIST_POP(&idle_conns[i].toremove_conns, + struct connection *, toremove_list))) { + conn_release(conn); + } + + if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid) + break; } - /* Ensure that there is no active/idle/pending connection on the server. - * - * TODO idle connections should not prevent server deletion. A proper - * cleanup function should be implemented to be used here. - */ - if (srv->curr_used_conns || srv->curr_idle_conns || - !eb_is_empty(&srv->queue.head) || srv_has_streams(srv)) { - cli_err(appctx, "Server still has connections attached to it, cannot remove it."); - goto out; + /* All idle connections should be removed now. */ + BUG_ON(srv->curr_idle_conns); + + /* Close idle private connections attached to this server. */ + mt_list_for_each_entry_safe(sess_conns, &srv->sess_conns, srv_el, elt1, elt2) { + struct connection *conn, *conn_back; + list_for_each_entry_safe(conn, conn_back, &sess_conns->conn_list, sess_el) { + + /* Only idle connections should be present if srv_check_for_deletion() is true. */ + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + + LIST_DEL_INIT(&conn->sess_el); + conn->owner = NULL; + conn->flags &= ~CO_FL_SESS_IDLE; + if (sess_conns->tid != tid) { + if (conn->mux && conn->mux->takeover) + conn->mux->takeover(conn, sess_conns->tid, 1); + else if (conn->xprt && conn->xprt->takeover) + conn->xprt->takeover(conn, conn->ctx, sess_conns->tid, 1); + } + conn_release(conn); + } + + LIST_DELETE(&sess_conns->sess_el); + MT_LIST_DELETE_SAFE(elt1); + pool_free(pool_head_sess_priv_conns, sess_conns); } /* removing cannot fail anymore when we reach this: @@ -5724,13 +6139,11 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap ha_notice("Server deleted.\n"); srv_drop(srv); - cli_msg(appctx, LOG_INFO, "Server deleted."); - + cli_msg(appctx, LOG_INFO, "Server deleted.\n"); return 0; out: thread_release(); - return 1; } @@ -6334,8 +6747,8 @@ static void srv_update_status(struct server *s, int type, int cause) if (srv_prev_state != s->cur_state) { if (srv_prev_state == SRV_ST_STOPPED) { /* server was down and no longer is */ - if (s->last_change < ns_to_sec(now_ns)) // ignore negative times - s->down_time += ns_to_sec(now_ns) - s->last_change; + if (s->counters.last_change < ns_to_sec(now_ns)) // ignore negative times + s->down_time += ns_to_sec(now_ns) - s->counters.last_change; _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s); } else if (s->cur_state == SRV_ST_STOPPED) { @@ -6343,7 +6756,7 @@ static void srv_update_status(struct server *s, int type, int cause) s->counters.down_trans++; _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DOWN, cb_data.common, s); } - s->last_change = ns_to_sec(now_ns); + s->counters.last_change = ns_to_sec(now_ns); /* publish the state change */ _srv_event_hdl_prepare_state(&cb_data.state, @@ -6358,9 +6771,9 @@ static void srv_update_status(struct server *s, int type, int cause) /* backend was down and is back up again: * no helper function, updating last_change and backend downtime stats */ - if (s->proxy->last_change < ns_to_sec(now_ns)) // ignore negative times - s->proxy->down_time += ns_to_sec(now_ns) - s->proxy->last_change; - s->proxy->last_change = ns_to_sec(now_ns); + if (s->proxy->be_counters.last_change < ns_to_sec(now_ns)) // ignore negative times + s->proxy->down_time += ns_to_sec(now_ns) - s->proxy->be_counters.last_change; + s->proxy->be_counters.last_change = ns_to_sec(now_ns); } } diff --git a/src/server_state.c b/src/server_state.c index ebdcf3c..ffc2463 100644 --- a/src/server_state.c +++ b/src/server_state.c @@ -321,7 +321,7 @@ static void srv_state_srv_update(struct server *srv, int version, char **params) srv_adm_set_drain(srv); } - srv->last_change = ns_to_sec(now_ns) - srv_last_time_change; + srv->counters.last_change = ns_to_sec(now_ns) - srv_last_time_change; srv->check.status = srv_check_status; srv->check.result = srv_check_result; diff --git a/src/session.c b/src/session.c index ce9ccbf..f8953df 100644 --- a/src/session.c +++ b/src/session.c @@ -27,8 +27,8 @@ DECLARE_POOL(pool_head_session, "session", sizeof(struct session)); -DECLARE_POOL(pool_head_sess_srv_list, "session server list", - sizeof(struct sess_srv_list)); +DECLARE_POOL(pool_head_sess_priv_conns, "session priv conns list", + sizeof(struct sess_priv_conns)); int conn_complete_session(struct connection *conn); @@ -61,7 +61,7 @@ struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type sess->t_idle = -1; _HA_ATOMIC_INC(&totalconn); _HA_ATOMIC_INC(&jobs); - LIST_INIT(&sess->srv_list); + LIST_INIT(&sess->priv_conns); sess->idle_conns = 0; sess->flags = SESS_FL_NONE; sess->src = NULL; @@ -76,33 +76,29 @@ struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type void session_free(struct session *sess) { struct connection *conn, *conn_back; - struct sess_srv_list *srv_list, *srv_list_back; + struct sess_priv_conns *pconns, *pconns_back; - if (sess->listener) + if (sess->flags & SESS_FL_RELEASE_LI) { + /* listener must be set for session used to account FE conns. */ + BUG_ON(!sess->listener); listener_release(sess->listener); + } + session_store_counters(sess); pool_free(pool_head_stk_ctr, sess->stkctr); vars_prune_per_sess(&sess->vars); conn = objt_conn(sess->origin); if (conn != NULL && conn->mux) conn->mux->destroy(conn->ctx); - list_for_each_entry_safe(srv_list, srv_list_back, &sess->srv_list, srv_list) { - list_for_each_entry_safe(conn, conn_back, &srv_list->conn_list, session_list) { - LIST_DEL_INIT(&conn->session_list); - if (conn->mux) { - conn->owner = NULL; - conn->flags &= ~CO_FL_SESS_IDLE; - conn->mux->destroy(conn->ctx); - } else { - /* We have a connection, but not yet an associated mux. - * So destroy it now. - */ - conn_stop_tracking(conn); - conn_full_close(conn); - conn_free(conn); - } + list_for_each_entry_safe(pconns, pconns_back, &sess->priv_conns, sess_el) { + list_for_each_entry_safe(conn, conn_back, &pconns->conn_list, sess_el) { + LIST_DEL_INIT(&conn->sess_el); + conn->owner = NULL; + conn->flags &= ~CO_FL_SESS_IDLE; + conn_release(conn); } - pool_free(pool_head_sess_srv_list, srv_list); + MT_LIST_DELETE(&pconns->srv_el); + pool_free(pool_head_sess_priv_conns, pconns); } sockaddr_free(&sess->src); sockaddr_free(&sess->dst); @@ -190,11 +186,17 @@ int session_accept_fd(struct connection *cli_conn) } } - sess = session_new(p, l, &cli_conn->obj_type); - if (!sess) - goto out_free_conn; + /* Reversed conns already have an assigned session, do not recreate it. */ + if (!(cli_conn->flags & CO_FL_REVERSED)) { + sess = session_new(p, l, &cli_conn->obj_type); + if (!sess) + goto out_free_conn; - conn_set_owner(cli_conn, sess, NULL); + conn_set_owner(cli_conn, sess, NULL); + } + else { + sess = cli_conn->owner; + } /* now evaluate the tcp-request layer4 rules. We only need a session * and no stream for these rules. @@ -293,12 +295,19 @@ int session_accept_fd(struct connection *cli_conn) sess->task->process = session_expire_embryonic; sess->task->expire = tick_add_ifset(now_ms, timeout); task_queue(sess->task); + + /* Session is responsible to decrement listener conns counters. */ + sess->flags |= SESS_FL_RELEASE_LI; + return 1; } /* OK let's complete stream initialization since there is no handshake */ - if (conn_complete_session(cli_conn) >= 0) + if (conn_complete_session(cli_conn) >= 0) { + /* Session is responsible to decrement listener conns counters. */ + sess->flags |= SESS_FL_RELEASE_LI; return 1; + } /* if we reach here we have deliberately decided not to keep this * session (e.g. tcp-request rule), so that's not an error we should @@ -308,9 +317,9 @@ int session_accept_fd(struct connection *cli_conn) /* error unrolling */ out_free_sess: - /* prevent call to listener_release during session_free. It will be - * done below, for all errors. */ - sess->listener = NULL; + /* SESS_FL_RELEASE_LI must not be set here as listener_release() is + * called manually for all errors. + */ session_free(sess); out_free_conn: @@ -322,15 +331,8 @@ int session_accept_fd(struct connection *cli_conn) MSG_DONTWAIT|MSG_NOSIGNAL); } - if (cli_conn->mux) { - /* Mux is already initialized for active reversed connection. */ - cli_conn->mux->destroy(cli_conn->ctx); - } - else { - conn_stop_tracking(cli_conn); - conn_full_close(cli_conn); - conn_free(cli_conn); - } + /* Mux is already initialized for active reversed connection. */ + conn_release(cli_conn); listener_release(l); return ret; } @@ -443,7 +445,7 @@ static void session_kill_embryonic(struct session *sess, unsigned int state) conn->err_code = CO_ER_SSL_TIMEOUT; } - if(!LIST_ISEMPTY(&sess->fe->logformat_error)) { + if(!lf_expr_isempty(&sess->fe->logformat_error)) { /* Display a log line following the configured error-log-format. */ sess_log(sess); } @@ -520,6 +522,18 @@ int conn_complete_session(struct connection *conn) return -1; } +/* Add <inc> to the number of cumulated glitches in the tracked counters for + * session <sess> which is known for being tracked, and implicitly update the + * rate if also tracked. + */ +void __session_add_glitch_ctr(struct session *sess, uint inc) +{ + int i; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) + stkctr_add_glitch_ctr(&sess->stkctr[i], inc); +} + /* * Local variables: * c-indent-level: 8 diff --git a/src/shctx.c b/src/shctx.c index be59053..931bc4f 100644 --- a/src/shctx.c +++ b/src/shctx.c @@ -16,6 +16,7 @@ #include <import/ebmbtree.h> #include <haproxy/list.h> #include <haproxy/shctx.h> +#include <haproxy/tools.h> /* * Reserve a new row if <first> is null, put it in the hotlist, set the refcount to 1 @@ -269,13 +270,14 @@ int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first, * and 0 if cache is already allocated. */ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize, - unsigned int maxobjsz, int extra) + unsigned int maxobjsz, int extra, const char *name) { int i; struct shared_context *shctx; int ret; void *cur; int maptype = MAP_SHARED; + size_t totalsize = sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)); if (maxblocks <= 0) return 0; @@ -284,14 +286,15 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize, blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *); extra = (extra + sizeof(void *) - 1) & -sizeof(void *); - shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)), - PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0); + shctx = (struct shared_context *)mmap(NULL, totalsize, PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0); if (!shctx || shctx == MAP_FAILED) { shctx = NULL; ret = SHCTX_E_ALLOC_CACHE; goto err; } + vma_set_name(shctx, totalsize, "shctx", name); + shctx->nbav = 0; LIST_INIT(&shctx->avail); @@ -87,7 +87,6 @@ static struct sink *__sink_new(const char *name, const char *desc, int fmt) /* address will be filled by the caller if needed */ sink->ctx.fd = -1; sink->ctx.dropped = 0; - HA_RWLOCK_INIT(&sink->ctx.lock); LIST_APPEND(&sink_list, &sink->sink_list); end: return sink; @@ -206,30 +205,79 @@ send: * here with the only difference that we override the log level. This is * possible since the announce message will be sent from the same context. * - * In case of success, the amount of drops is reduced by as much. It's supposed - * to be called under an exclusive lock on the sink to avoid multiple producers - * doing the same. On success, >0 is returned, otherwise <=0 on failure. + * In case of success, the amount of drops is reduced by as much. + * The function ensures that a single thread will do that work at once, other + * ones will only report a failure if a thread is dumping, so that no thread + * waits. A pair od atomic OR and AND is performed around the code so the + * caller would be advised to only call this function AFTER having verified + * that sink->ctx.dropped is not zero in order to avoid a memory write. On + * success, >0 is returned, otherwise <=0 on failure, indicating that it could + * not eliminate the pending drop counter. It may loop up to 10 times trying + * to catch up with failing competing threads. */ int sink_announce_dropped(struct sink *sink, struct log_header hdr) { - unsigned int dropped; - struct buffer msg; + static THREAD_LOCAL char msg_dropped1[] = "1 event dropped"; + static THREAD_LOCAL char msg_dropped2[] = "0000000000 events dropped"; + uint dropped, last_dropped; struct ist msgvec[1]; - char logbuf[64]; + uint retries = 10; + int ret = 0; + + /* Explanation. ctx.dropped is made of: + * bit0 = 1 if dropped dump in progress + * bit1..31 = dropped counter + * If non-zero there have been some drops. If not &1, it means + * nobody's taking care of them and we'll have to, otherwise + * another thread is already on them and we can just pass and + * count another drop (hence add 2). + */ + dropped = HA_ATOMIC_FETCH_OR(&sink->ctx.dropped, 1); + if (dropped & 1) { + /* another thread was already on it */ + goto leave; + } - while (unlikely((dropped = sink->ctx.dropped) > 0)) { - chunk_init(&msg, logbuf, sizeof(logbuf)); - chunk_printf(&msg, "%u event%s dropped", dropped, dropped > 1 ? "s" : ""); - msgvec[0] = ist2(msg.area, msg.data); + last_dropped = 0; + dropped >>= 1; + while (1) { + while (unlikely(dropped > last_dropped) && retries-- > 0) { + /* try to aggregate multiple messages if other threads arrive while + * we're producing the dropped message. + */ + uint msglen = sizeof(msg_dropped1); + const char *msg = msg_dropped1; + + last_dropped = dropped; + if (dropped > 1) { + msg = ultoa_r(dropped, msg_dropped2, 11); + msg_dropped2[10] = ' '; + msglen = msg_dropped2 + sizeof(msg_dropped2) - msg; + } + msgvec[0] = ist2(msg, msglen); + dropped = HA_ATOMIC_LOAD(&sink->ctx.dropped) >> 1; + } + if (!dropped) + break; + + last_dropped = 0; hdr.level = LOG_NOTICE; /* override level but keep original log header data */ if (__sink_write(sink, hdr, 0, msgvec, 1) <= 0) - return 0; + goto done; + /* success! */ - HA_ATOMIC_SUB(&sink->ctx.dropped, dropped); + HA_ATOMIC_SUB(&sink->ctx.dropped, dropped << 1); } - return 1; + + /* done! */ + ret = 1; +done: + /* unlock the counter */ + HA_ATOMIC_AND(&sink->ctx.dropped, ~1); +leave: + return ret; } /* parse the "show events" command, returns 1 if a message is returned, otherwise zero */ @@ -284,7 +332,7 @@ static int cli_parse_show_events(char **args, char *payload, struct appctx *appc /* Pre-configures a ring proxy to emit connections */ void sink_setup_proxy(struct proxy *px) { - px->last_change = ns_to_sec(now_ns); + px->be_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_BE; px->maxconn = 0; px->conn_retries = 1; @@ -307,13 +355,12 @@ static void sink_forward_io_handler(struct appctx *appctx) struct sink_forward_target *sft = appctx->svcctx; struct sink *sink = sft->sink; struct ring *ring = sink->ctx.ring; - struct buffer *buf = &ring->buf; - uint64_t msg_len; - size_t len, cnt, ofs, last_ofs; + size_t ofs, last_ofs; int ret = 0; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { goto out; + } /* if stopping was requested, close immediately */ if (unlikely(stopping)) @@ -335,77 +382,14 @@ static void sink_forward_io_handler(struct appctx *appctx) goto close; } - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); - - /* explanation for the initialization below: it would be better to do - * this in the parsing function but this would occasionally result in - * dropped events because we'd take a reference on the oldest message - * and keep it while being scheduled. Thus instead let's take it the - * first time we enter here so that we have a chance to pass many - * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. - */ - if (unlikely(sft->ofs == ~0)) { - sft->ofs = b_peek_ofs(buf, 0); - HA_ATOMIC_INC(b_orig(buf) + sft->ofs); - } - - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. - */ - ofs = sft->ofs - b_head_ofs(buf); - if (sft->ofs < b_head_ofs(buf)) - ofs += b_size(buf); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); - - /* in this loop, ofs always points to the counter byte that precedes - * the message so that we can take our reference there if we have to - * stop before the end (ret=0). - */ - ret = 1; - while (ofs + 1 < b_data(buf)) { - cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); - if (!len) - break; - cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - - if (unlikely(msg_len + 1 > b_size(&trash))) { - /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; - } - - chunk_reset(&trash); - len = b_getblk(buf, trash.area, msg_len, ofs + cnt); - trash.data += len; - trash.area[trash.data++] = '\n'; - - if (applet_putchk(appctx, &trash) == -1) { - ret = 0; - break; - } - ofs += cnt + msg_len; - } - - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - sft->ofs = b_peek_ofs(buf, ofs); + MT_LIST_DELETE(&appctx->wait_entry); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + ret = ring_dispatch_messages(ring, appctx, &sft->ofs, &last_ofs, 0, applet_append_line); if (ret) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -437,11 +421,8 @@ static void sink_forward_oc_io_handler(struct appctx *appctx) struct sink_forward_target *sft = appctx->svcctx; struct sink *sink = sft->sink; struct ring *ring = sink->ctx.ring; - struct buffer *buf = &ring->buf; - uint64_t msg_len; - size_t len, cnt, ofs, last_ofs; + size_t ofs, last_ofs; int ret = 0; - char *p; if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) goto out; @@ -466,80 +447,13 @@ static void sink_forward_oc_io_handler(struct appctx *appctx) goto close; } - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); - - /* explanation for the initialization below: it would be better to do - * this in the parsing function but this would occasionally result in - * dropped events because we'd take a reference on the oldest message - * and keep it while being scheduled. Thus instead let's take it the - * first time we enter here so that we have a chance to pass many - * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. - */ - if (unlikely(sft->ofs == ~0)) { - sft->ofs = b_peek_ofs(buf, 0); - HA_ATOMIC_INC(b_orig(buf) + sft->ofs); - } - - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. - */ - ofs = sft->ofs - b_head_ofs(buf); - if (sft->ofs < b_head_ofs(buf)) - ofs += b_size(buf); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); - - /* in this loop, ofs always points to the counter byte that precedes - * the message so that we can take our reference there if we have to - * stop before the end (ret=0). - */ - ret = 1; - while (ofs + 1 < b_data(buf)) { - cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); - if (!len) - break; - cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - - chunk_reset(&trash); - p = ulltoa(msg_len, trash.area, b_size(&trash)); - if (p) { - trash.data = (p - trash.area) + 1; - *p = ' '; - } - - if (!p || (trash.data + msg_len > b_size(&trash))) { - /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; - } - - trash.data += b_getblk(buf, p + 1, msg_len, ofs + cnt); - - if (applet_putchk(appctx, &trash) == -1) { - ret = 0; - break; - } - ofs += cnt + msg_len; - } - - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - sft->ofs = b_peek_ofs(buf, ofs); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_DELETE(&appctx->wait_entry); + ret = ring_dispatch_messages(ring, appctx, &sft->ofs, &last_ofs, 0, syslog_applet_append_event); if (ret) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -569,9 +483,7 @@ void __sink_forward_session_deinit(struct sink_forward_target *sft) if (!sink) return; - HA_RWLOCK_WRLOCK(RING_LOCK, &sink->ctx.ring->lock); - LIST_DEL_INIT(&sft->appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &sink->ctx.ring->lock); + MT_LIST_DELETE(&sft->appctx->wait_entry); sft->appctx = NULL; task_wakeup(sink->forward_task, TASK_WOKEN_MSG); @@ -728,7 +640,7 @@ int sink_init_forward(struct sink *sink) */ void sink_rotate_file_backed_ring(const char *name) { - struct ring ring; + struct ring_storage storage; char *oldback; int ret; int fd; @@ -738,16 +650,20 @@ void sink_rotate_file_backed_ring(const char *name) return; /* check for contents validity */ - ret = read(fd, &ring, sizeof(ring)); + ret = read(fd, &storage, sizeof(storage)); close(fd); - if (ret != sizeof(ring)) + if (ret != sizeof(storage)) goto rotate; + /* check that it's the expected format before touching it */ + if (storage.rsvd != sizeof(storage)) + return; + /* contents are present, we want to keep them => rotate. Note that * an empty ring buffer has one byte (the marker). */ - if (ring.buf.data > 1) + if (storage.head != 0 || storage.tail != 1) goto rotate; /* nothing to keep, let's scratch the file and preserve the backup */ @@ -779,15 +695,14 @@ static void sink_free(struct sink *sink) return; if (sink->type == SINK_TYPE_BUFFER) { if (sink->store) { - size_t size = (sink->ctx.ring->buf.size + 4095UL) & -4096UL; - void *area = (sink->ctx.ring->buf.area - sizeof(*sink->ctx.ring)); + size_t size = (ring_allocated_size(sink->ctx.ring) + 4095UL) & -4096UL; + void *area = ring_allocated_area(sink->ctx.ring); msync(area, size, MS_SYNC); munmap(area, size); ha_free(&sink->store); } - else - ring_free(sink->ctx.ring); + ring_free(sink->ctx.ring); } LIST_DEL_INIT(&sink->sink_list); // remove from parent list task_destroy(sink->forward_task); @@ -914,6 +829,12 @@ static int sink_finalize(struct sink *sink) ha_alert("error when trying to initialize sink buffer forwarding.\n"); err_code |= ERR_ALERT | ERR_FATAL; } + if (!sink->store) { + /* virtual memory backed sink */ + vma_set_name(ring_allocated_area(sink->ctx.ring), + ring_allocated_size(sink->ctx.ring), + "ring", sink->name); + } } return err_code; } @@ -979,22 +900,28 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) goto err; } + if (size > RING_TAIL_LOCK) { + ha_alert("parsing [%s:%d] : too large size '%llu' for new sink buffer, the limit on this platform is %llu bytes.\n", file, linenum, (ullong)size, (ullong)RING_TAIL_LOCK); + err_code |= ERR_ALERT | ERR_FATAL; + goto err; + } + if (cfg_sink->store) { ha_alert("parsing [%s:%d] : cannot resize an already mapped file, please specify 'size' before 'backing-file'.\n", file, linenum); err_code |= ERR_ALERT | ERR_FATAL; goto err; } - if (size < cfg_sink->ctx.ring->buf.size) { - ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than current size '%llu' for ring '%s'.\n", - file, linenum, (ullong)size, (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name); + if (size < ring_data(cfg_sink->ctx.ring)) { + ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than contents '%llu' for ring '%s'.\n", + file, linenum, (ullong)size, (ullong)ring_data(cfg_sink->ctx.ring), cfg_sink->name); err_code |= ERR_WARN; goto err; } if (!ring_resize(cfg_sink->ctx.ring, size)) { ha_alert("parsing [%s:%d] : fail to set sink buffer size '%llu' for ring '%s'.\n", file, linenum, - (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name); + (ullong)ring_size(cfg_sink->ctx.ring), cfg_sink->name); err_code |= ERR_ALERT | ERR_FATAL; goto err; } @@ -1034,7 +961,7 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) goto err; } - size = (cfg_sink->ctx.ring->buf.size + 4095UL) & -4096UL; + size = (ring_size(cfg_sink->ctx.ring) + 4095UL) & -4096UL; if (ftruncate(fd, size) != 0) { close(fd); ha_alert("parsing [%s:%d] : could not adjust size of backing-file for ring '%s': %s.\n", file, linenum, cfg_sink->name, strerror(errno)); @@ -1056,7 +983,7 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) /* never fails */ ring_free(cfg_sink->ctx.ring); - cfg_sink->ctx.ring = ring_make_from_area(area, size); + cfg_sink->ctx.ring = ring_make_from_area(area, size, 1); } else if (strcmp(args[0],"server") == 0) { if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) { @@ -30,6 +30,7 @@ #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/namespace.h> +#include <haproxy/protocol-t.h> #include <haproxy/proto_sockpair.h> #include <haproxy/sock.h> #include <haproxy/sock_inet.h> @@ -109,6 +110,9 @@ struct connection *sock_accept_conn(struct listener *l, int *status) goto fail_conn; } + if (unlikely(port_is_restricted(addr, HA_PROTO_TCP))) + goto fail_conn; + /* Perfect, the connection was accepted */ conn = conn_new(&l->obj_type); if (!conn) @@ -195,14 +199,76 @@ struct connection *sock_accept_conn(struct listener *l, int *status) goto done; } +/* Common code to handle in one place different ERRNOs, that socket() et setns() + * may return + */ +static int sock_handle_system_err(struct connection *conn, struct proxy *be) +{ + qfprintf(stderr, "Cannot get a server socket.\n"); + + conn->flags |= CO_FL_ERROR; + conn->err_code = CO_ER_SOCK_ERR; + + switch(errno) { + case ENFILE: + conn->err_code = CO_ER_SYS_FDLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached system FD limit (maxsock=%d). " + "Please check system tunables.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case EMFILE: + conn->err_code = CO_ER_PROC_FDLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached process FD limit (maxsock=%d). " + "Please check 'ulimit-n' and restart.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case ENOBUFS: + case ENOMEM: + conn->err_code = CO_ER_SYS_MEMLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached system memory limit (maxsock=%d). " + "Please check system tunables.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case EAFNOSUPPORT: + case EPROTONOSUPPORT: + conn->err_code = CO_ER_NOPROTO; + break; + + case EPERM: + conn->err_code = CO_ER_SOCK_ERR; + send_log(be, LOG_EMERG, + "Proxy %s has insufficient permissions to open server socket.\n", + be->id); + + return SF_ERR_PRXCOND; + + default: + send_log(be, LOG_EMERG, + "Proxy %s cannot create a server socket: %s\n", + be->id, strerror(errno)); + } + + return SF_ERR_INTERNAL; +} + /* Create a socket to connect to the server in conn->dst (which MUST be valid), * using the configured namespace if needed, or the one passed by the proxy - * protocol if required to do so. It ultimately calls socket() or socketat() - * and returns the FD or error code. + * protocol if required to do so. It then calls socket() or socketat(). On + * success, checks if mark or tos sockopts need to be set on the file handle. + * Returns backend connection socket FD on success, stream_err flag needed by + * upper level is set as SF_ERR_NONE; -1 on failure, stream_err is set to + * appropriate value. */ -int sock_create_server_socket(struct connection *conn) +int sock_create_server_socket(struct connection *conn, struct proxy *be, int *stream_err) { const struct netns_entry *ns = NULL; + int sock_fd; #ifdef USE_NS if (objt_server(conn->target)) { @@ -212,7 +278,60 @@ int sock_create_server_socket(struct connection *conn) ns = __objt_server(conn->target)->netns; } #endif - return my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0); + sock_fd = my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0); + + /* at first, handle common to all proto families system limits and permission related errors */ + if (sock_fd == -1) { + *stream_err = sock_handle_system_err(conn, be); + + return -1; + } + + /* now perform some runtime condition checks */ + if (sock_fd >= global.maxsock) { + /* do not log anything there, it's a normal condition when this option + * is used to serialize connections to a server ! + */ + ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + send_log(be, LOG_EMERG, "socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + close(sock_fd); + conn->err_code = CO_ER_CONF_FDLIM; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_PRXCOND; /* it is a configuration limit */ + + return -1; + } + + if (fd_set_nonblock(sock_fd) == -1 || + ((conn->ctrl->sock_prot == IPPROTO_TCP) && (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) { + qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); + send_log(be, LOG_EMERG, "Cannot set client socket to non blocking mode.\n"); + close(sock_fd); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_INTERNAL; + + return -1; + } + + if (master == 1 && fd_set_cloexec(sock_fd) == -1) { + ha_alert("Cannot set CLOEXEC on client socket.\n"); + send_log(be, LOG_EMERG, "Cannot set CLOEXEC on client socket.\n"); + close(sock_fd); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_INTERNAL; + + return -1; + } + + if (conn->flags & CO_FL_OPT_MARK) + sock_set_mark(sock_fd, conn->ctrl->fam->sock_family, conn->mark); + if (conn->flags & CO_FL_OPT_TOS) + sock_set_tos(sock_fd, conn->dst, conn->tos); + + *stream_err = SF_ERR_NONE; + return sock_fd; } /* Enables receiving on receiver <rx> once already bound. */ @@ -807,6 +926,13 @@ int sock_conn_check(struct connection *conn) return 0; wait: + /* we may arrive here due to connect() misleadingly reporting EALREADY + * in some corner cases while the system disagrees and reports an error + * on the FD. + */ + if (fdtab[fd].state & FD_POLL_ERR) + goto out_error; + fd_cant_send(fd); fd_want_send(fd); return 0; diff --git a/src/sock_unix.c b/src/sock_unix.c index ef749a5..0f9bc9a 100644 --- a/src/sock_unix.c +++ b/src/sock_unix.c @@ -255,8 +255,8 @@ int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) } addr.sun_family = AF_UNIX; - /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ - fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); + fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain, + rx->proto->sock_type, rx->proto->sock_prot); if (fd < 0) { err |= ERR_FATAL | ERR_ALERT; memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); diff --git a/src/ssl_ckch.c b/src/ssl_ckch.c index ebab1f3..b178078 100644 --- a/src/ssl_ckch.c +++ b/src/ssl_ckch.c @@ -28,6 +28,7 @@ #include <haproxy/applet.h> #include <haproxy/base64.h> +#include <haproxy/cfgparse.h> #include <haproxy/channel.h> #include <haproxy/cli.h> #include <haproxy/errors.h> @@ -111,6 +112,7 @@ struct commit_cacrlfile_ctx { enum { CACRL_ST_INIT = 0, CACRL_ST_GEN, + CACRL_ST_CRLCB, CACRL_ST_INSERT, CACRL_ST_SUCCESS, CACRL_ST_FIN, @@ -119,6 +121,18 @@ struct commit_cacrlfile_ctx { }; +/* + * Callback function, which is called if defined after loading CRLs from disk + * when starting HAProxy (function __ssl_store_load_locations_file()), and after + * committing new CRLs via CLI (function cli_io_handler_commit_cafile_crlfile()). + * + * The input parameters of the function are the path for the CRL data and + * a structure containing information about X.509 certificates and CRLs. + * In case of error, returns -1 with an error message in err; or the number + * of revoked certificates (>= 0) otherwise. + */ +int (*ssl_commit_crlfile_cb)(const char *path, X509_STORE *ctx, char **err) = NULL; + /******************** cert_key_and_chain functions ************************* * These are the functions that fills a cert_key_and_chain structure. For the * functions filling a SSL_CTX from a cert_key_and_chain, see ssl_sock.c @@ -721,8 +735,27 @@ void ssl_sock_free_cert_key_and_chain_contents(struct ckch_data *data) X509_free(data->ocsp_issuer); data->ocsp_issuer = NULL; - OCSP_CERTID_free(data->ocsp_cid); - data->ocsp_cid = NULL; + + /* We need to properly remove the reference to the corresponding + * certificate_ocsp structure if it exists (which it should). + */ +#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL) + if (data->ocsp_cid) { + struct certificate_ocsp *ocsp = NULL; + unsigned char certid[OCSP_MAX_CERTID_ASN1_LENGTH] = {}; + unsigned int certid_length = 0; + + if (ssl_ocsp_build_response_key(data->ocsp_cid, (unsigned char*)certid, &certid_length) >= 0) { + HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); + ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, certid, OCSP_MAX_CERTID_ASN1_LENGTH); + HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); + ssl_sock_free_ocsp(ocsp); + } + + OCSP_CERTID_free(data->ocsp_cid); + data->ocsp_cid = NULL; + } +#endif } /* @@ -794,8 +827,6 @@ struct ckch_data *ssl_sock_copy_cert_key_and_chain(struct ckch_data *src, dst->ocsp_cid = OCSP_CERTID_dup(src->ocsp_cid); - dst->ocsp_update_mode = src->ocsp_update_mode; - return dst; error: @@ -877,6 +908,9 @@ void ckch_store_free(struct ckch_store *store) ssl_sock_free_cert_key_and_chain_contents(store->data); ha_free(&store->data); + /* free the ckch_conf content */ + ckch_conf_clean(&store->conf); + free(store); } @@ -928,6 +962,9 @@ struct ckch_store *ckchs_dup(const struct ckch_store *src) if (!ssl_sock_copy_cert_key_and_chain(src->data, dst->data)) goto error; + + dst->conf.ocsp_update_mode = src->conf.ocsp_update_mode; + return dst; error: @@ -953,7 +990,7 @@ struct ckch_store *ckchs_lookup(char *path) /* * This function allocate a ckch_store and populate it with certificates from files. */ -struct ckch_store *ckchs_load_cert_file(char *path, char **err) +struct ckch_store *ckch_store_new_load_files_path(char *path, char **err) { struct ckch_store *ckchs; @@ -966,6 +1003,8 @@ struct ckch_store *ckchs_load_cert_file(char *path, char **err) if (ssl_sock_load_files_into_ckch(path, ckchs->data, err) == 1) goto end; + ckchs->conf.used = CKCH_CONF_SET_EMPTY; + /* insert into the ckchs tree */ memcpy(ckchs->path, path, strlen(path) + 1); ebst_insert(&ckchs_tree, &ckchs->node); @@ -977,6 +1016,51 @@ end: return NULL; } +/* + * This function allocate a ckch_store and populate it with certificates using + * the ckch_conf structure. + */ +struct ckch_store *ckch_store_new_load_files_conf(char *name, struct ckch_conf *conf, char **err) +{ + struct ckch_store *ckchs; + int cfgerr = ERR_NONE; + char *tmpcrt = conf->crt; + + ckchs = ckch_store_new(name); + if (!ckchs) { + memprintf(err, "%sunable to allocate memory.\n", err && *err ? *err : ""); + goto end; + } + + /* this is done for retro-compatibility. When no "filename" crt-store + * options were configured in a crt-list, try to load the files by + * auto-detecting them. */ + if ((conf->used == CKCH_CONF_SET_EMPTY || conf->used == CKCH_CONF_SET_CRTLIST) && + (!conf->key && !conf->ocsp && !conf->issuer && !conf->sctl)) { + cfgerr = ssl_sock_load_files_into_ckch(conf->crt, ckchs->data, err); + if (cfgerr & ERR_FATAL) + goto end; + /* set conf->crt to NULL so it's not erased */ + conf->crt = NULL; + } + + /* load files using the ckch_conf */ + cfgerr = ckch_store_load_files(conf, ckchs, 0, err); + if (cfgerr & ERR_FATAL) + goto end; + + conf->crt = tmpcrt; + + /* insert into the ckchs tree */ + memcpy(ckchs->path, name, strlen(name) + 1); + ebst_insert(&ckchs_tree, &ckchs->node); + return ckchs; + +end: + ckch_store_free(ckchs); + + return NULL; +} /******************** ckch_inst functions ******************************/ @@ -1383,6 +1467,14 @@ scandir_err: goto err; } + if (ssl_commit_crlfile_cb != NULL) { + if (ssl_commit_crlfile_cb(path, store, NULL) == -1) { + if (!shuterror) + ha_alert("crl-file: couldn't load '%s'\n", path); + goto err; + } + } + objs = X509_STORE_get0_objects(store); cert_count = sk_X509_OBJECT_num(objs); if (cert_count == 0) { @@ -1978,7 +2070,7 @@ int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi, if (ckchi->is_server_instance) errcode |= ckch_inst_new_load_srv_store(ckch_store->path, ckch_store, new_inst, err); else - errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, new_inst, err); + errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, ckchi->is_default, new_inst, err); if (errcode & ERR_CODE) return 1; @@ -2115,16 +2207,11 @@ void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckc static int cli_io_handler_commit_cert(struct appctx *appctx) { struct commit_cert_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); int y = 0; struct ckch_store *old_ckchs, *new_ckchs = NULL; struct ckch_inst *ckchi; usermsgs_clr("CLI"); - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - while (1) { switch (ctx->state) { case CERT_ST_INIT: @@ -2801,17 +2888,12 @@ error: static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx) { struct commit_cacrlfile_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); int y = 0; struct cafile_entry *old_cafile_entry = ctx->old_entry; struct cafile_entry *new_cafile_entry = ctx->new_entry; struct ckch_inst_link *ckchi_link; char *path; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - /* The ctx was already validated by the ca-file/crl-file parsing * function. Entries can only be NULL in CACRL_ST_SUCCESS or * CACRL_ST_FIN states @@ -2888,6 +2970,15 @@ static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx) y++; } + ctx->state = CACRL_ST_CRLCB; + __fallthrough; + case CACRL_ST_CRLCB: + if ((ctx->cafile_type == CAFILE_CRL) && (ssl_commit_crlfile_cb != NULL)) { + if (ssl_commit_crlfile_cb(crlfile_transaction.path, crlfile_transaction.new_crlfile_entry->ca_store, &ctx->err) == -1) { + ctx->state = CACRL_ST_ERROR; + goto error; + } + } ctx->state = CACRL_ST_INSERT; __fallthrough; case CACRL_ST_INSERT: @@ -3947,3 +4038,544 @@ static struct cli_kw_list cli_kws = {{ },{ INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); +static char *current_crtbase = NULL; +static char *current_keybase = NULL; +static int crtstore_load = 0; /* did we already load in this crt-store */ + +struct ckch_conf_kws ckch_conf_kws[] = { + { "alias", -1, PARSE_TYPE_NONE, NULL, NULL }, + { "crt", offsetof(struct ckch_conf, crt), PARSE_TYPE_STR, ckch_conf_load_pem, ¤t_crtbase }, + { "key", offsetof(struct ckch_conf, key), PARSE_TYPE_STR, ckch_conf_load_key, ¤t_keybase }, + { "ocsp", offsetof(struct ckch_conf, ocsp), PARSE_TYPE_STR, ckch_conf_load_ocsp_response, ¤t_crtbase }, + { "issuer", offsetof(struct ckch_conf, issuer), PARSE_TYPE_STR, ckch_conf_load_ocsp_issuer, ¤t_crtbase }, + { "sctl", offsetof(struct ckch_conf, sctl), PARSE_TYPE_STR, ckch_conf_load_sctl, ¤t_crtbase }, + { "ocsp-update", offsetof(struct ckch_conf, ocsp_update_mode), PARSE_TYPE_ONOFF, ocsp_update_init, NULL }, + { NULL, -1, PARSE_TYPE_STR, NULL, NULL } +}; + +/* crt-store does not try to find files, but use the stored filename */ +int ckch_store_load_files(struct ckch_conf *f, struct ckch_store *c, int cli, char **err) +{ + int i; + int err_code = 0; + int rc = 1; + struct ckch_data *d = c->data; + + for (i = 0; ckch_conf_kws[i].name; i++) { + void *src = NULL; + + if (ckch_conf_kws[i].offset < 0) + continue; + + if (!ckch_conf_kws[i].func) + continue; + + src = (void *)((intptr_t)f + (ptrdiff_t)ckch_conf_kws[i].offset); + + switch (ckch_conf_kws[i].type) { + case PARSE_TYPE_STR: + { + char *v; + char *path; + char **base = ckch_conf_kws[i].base; + char path_base[PATH_MAX]; + + v = *(char **)src; + if (!v) + goto next; + + path = v; + if (base && *base && *path != '/') { + int rv = snprintf(path_base, sizeof(path_base), "%s/%s", *base, path); + if (rv >= sizeof(path_base)) { + memprintf(err, "'%s/%s' : path too long", *base, path); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + path = path_base; + } + rc = ckch_conf_kws[i].func(path, NULL, d, cli, err); + if (rc) { + err_code |= ERR_ALERT | ERR_FATAL; + memprintf(err, "%s '%s' cannot be read or parsed.", err && *err ? *err : "", path); + goto out; + } + break; + } + + case PARSE_TYPE_INT: + case PARSE_TYPE_ONOFF: + { + int v = *(int *)src; + rc = ckch_conf_kws[i].func(&v, NULL, d, cli, err); + if (rc) { + err_code |= ERR_ALERT | ERR_FATAL; + memprintf(err, "%s '%d' cannot be read or parsed.", err && *err ? *err : "", v); + goto out; + } + + break; + } + + default: + break; + } +next: + ; + } + +out: + if (err_code & ERR_FATAL) + ssl_sock_free_cert_key_and_chain_contents(d); + ERR_clear_error(); + + return err_code; +} + +/* Parse a local crt-base or key-base for a crt-store */ +static int crtstore_parse_path_base(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, + const char *file, int linenum, char **err) +{ + int err_code = ERR_NONE; + + if (!*args[1]) { + memprintf(err, "parsing [%s:%d] : '%s' requires a <path> argument.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + if (crtstore_load) { + memprintf(err, "parsing [%s:%d] : '%s' can't be used after a load line, use it at the beginning of the section.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + if (args[0][1] == 'r') { + /* crt-base */ + free(current_crtbase); + current_crtbase = strdup(args[1]); + } else if (args[0][1] == 'e') { + /* key-base */ + free(current_keybase); + current_keybase = strdup(args[1]); + } +out: + return err_code; +} + +/* + * Check if ckch_conf <prev> and <new> are compatible: + * + * new \ prev | EMPTY | CRTLIST | CRTSTORE + * ---------------------------------------- + * EMPTY | OK | X | OK + * ---------------------------------------- + * CRTLIST | X | CMP | CMP + * ---------------------------------------- + * + * Return: + * 1 when the 2 structures have different variables or are incompatible + * 0 when the 2 structures have equal variables or are compatibles + */ +int ckch_conf_cmp(struct ckch_conf *prev, struct ckch_conf *new, char **err) +{ + int ret = 0; + int i; + + if (!prev || !new) + return 1; + + /* compatibility check */ + + if (prev->used == CKCH_CONF_SET_EMPTY) { + if (new->used == CKCH_CONF_SET_CRTLIST) { + memprintf(err, "%sCan't use the certificate previously defined without any keyword with these keywords:\n", *err ? *err : ""); + ret = 1; + } + if (new->used == CKCH_CONF_SET_EMPTY) + return 0; + + } else if (prev->used == CKCH_CONF_SET_CRTLIST) { + if (new->used == CKCH_CONF_SET_EMPTY) { + memprintf(err, "%sCan't use the certificate previously defined with keywords without these keywords:\n", *err ? *err : ""); + ret = 1; + } + } else if (prev->used == CKCH_CONF_SET_CRTSTORE) { + if (new->used == CKCH_CONF_SET_EMPTY) + return 0; + } + + + for (i = 0; ckch_conf_kws[i].name != NULL; i++) { + + if (strcmp(ckch_conf_kws[i].name, "crt") == 0) + continue; + + switch (ckch_conf_kws[i].type) { + case PARSE_TYPE_STR: { + char *avail1, *avail2; + avail1 = *(char **)((intptr_t)prev + (ptrdiff_t)ckch_conf_kws[i].offset); + avail2 = *(char **)((intptr_t)new + (ptrdiff_t)ckch_conf_kws[i].offset); + + /* must alert when strcmp is wrong, or when one of the field is NULL */ + if (((avail1 && avail2) && strcmp(avail1, avail2) != 0) || (!!avail1 ^ !!avail2)) { + memprintf(err, "%s- different parameter '%s' : previously '%s' vs '%s'\n", *err ? *err : "", ckch_conf_kws[i].name, avail1, avail2); + ret = 1; + } + } + break; + + default: + break; + } + /* special case for ocsp-update and default */ + if (strcmp(ckch_conf_kws[i].name, "ocsp-update") == 0) { + int o1, o2; /* ocsp-update from the configuration */ + int q1, q2; /* final ocsp-update value (from default) */ + + + o1 = *(int *)((intptr_t)prev + (ptrdiff_t)ckch_conf_kws[i].offset); + o2 = *(int *)((intptr_t)new + (ptrdiff_t)ckch_conf_kws[i].offset); + + q1 = (o1 == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : o1; + q2 = (o2 == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : o2; + + if (q1 != q2) { + int j = 1; + int o = o1; + int q = q1; + memprintf(err, "%s- different parameter '%s' : previously ", *err ? *err : "", ckch_conf_kws[i].name); + + do { + switch (o) { + case SSL_SOCK_OCSP_UPDATE_DFLT: + memprintf(err, "%s'default' (ocsp-update.mode %s)", *err ? *err : "", (q > 0) ? "on" : "off"); + break; + case SSL_SOCK_OCSP_UPDATE_ON: + memprintf(err, "%s'%s'", *err ? *err : "", "on"); + break; + case SSL_SOCK_OCSP_UPDATE_OFF: + memprintf(err, "%s'%s'", *err ? *err : "", "off"); + break; + } + o = o2; + q = q2; + if (j) + memprintf(err, "%s vs ", *err ? *err : ""); + } while (j--); + memprintf(err, "%s\n", *err ? *err : ""); + ret = 1; + } + } + } + +out: + return ret; +} + +/* + * Compare a previously generated ckch_conf with an empty one, using ckch_conf_cmp(). + */ +int ckch_conf_cmp_empty(struct ckch_conf *prev, char **err) +{ + struct ckch_conf new = {}; + + return ckch_conf_cmp(prev, &new, err); +} + +/* parse ckch_conf keywords for crt-list */ +int ckch_conf_parse(char **args, int cur_arg, struct ckch_conf *f, int *found, const char *file, int linenum, char **err) +{ + int i; + int err_code = 0; + + for (i = 0; ckch_conf_kws[i].name != NULL; i++) { + if (strcmp(ckch_conf_kws[i].name, args[cur_arg]) == 0) { + void *target; + *found = 1; + target = (char **)((intptr_t)f + (ptrdiff_t)ckch_conf_kws[i].offset); + + if (ckch_conf_kws[i].type == PARSE_TYPE_STR) { + char **t = target; + + *t = strdup(args[cur_arg + 1]); + if (!*t) { + ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum); + err_code |= ERR_ALERT | ERR_ABORT; + goto out; + } + } else if (ckch_conf_kws[i].type == PARSE_TYPE_INT) { + int *t = target; + char *stop; + + *t = strtol(args[cur_arg + 1], &stop, 10); + if (*stop != '\0') { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', an integer is expected.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } else if (ckch_conf_kws[i].type == PARSE_TYPE_ONOFF) { + int *t = target; + + if (strcmp(args[cur_arg + 1], "on") == 0) { + *t = 1; + } else if (strcmp(args[cur_arg + 1], "off") == 0) { + *t = -1; + } else { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', 'on' or 'off' is expected.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } + break; + } + } +out: + return err_code; +} + +/* freeing the content of a ckch_conf structure */ +void ckch_conf_clean(struct ckch_conf *conf) +{ + free(conf->crt); + free(conf->key); + free(conf->ocsp); + free(conf->issuer); + free(conf->sctl); +} + +static char current_crtstore_name[PATH_MAX] = {}; + +static int crtstore_parse_load(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, + const char *file, int linenum, char **err) +{ + int err_code = 0; + int cur_arg = 0; + struct ckch_conf f = {}; + struct ckch_store *c = NULL; + char store_path[PATH_MAX]; /* complete path with crt_base */ + char alias_name[PATH_MAX]; /* complete alias name with the store prefix '@/' */ + char *final_name = NULL; /* name used as a key in the ckch_store */ + + cur_arg++; /* skip "load" */ + + while (*(args[cur_arg])) { + int found = 0; + + if (strcmp("alias", args[cur_arg]) == 0) { + int rv; + + if (*args[cur_arg + 1] == '/') { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', '/' is forbidden as the first character.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + rv = snprintf(alias_name, sizeof(alias_name), "@%s/%s", current_crtstore_name, args[cur_arg + 1]); + if (rv >= sizeof(alias_name)) { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', too long, max len is %zd.\n", + file, linenum, args[cur_arg], args[cur_arg + 1], sizeof(alias_name)); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = alias_name; + found = 1; + } else { + err_code |= ckch_conf_parse(args, cur_arg, &f, &found, file, linenum, err); + if (err_code & ERR_FATAL) + goto out; + } + + if (!found) { + memprintf(err,"parsing [%s:%d] : '%s %s' in section 'crt-store': unknown keyword '%s'.", + file, linenum, args[0], args[cur_arg],args[cur_arg]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + cur_arg += 2; + } + + if (!f.crt) { + memprintf(err,"parsing [%s:%d] : '%s' in section 'crt-store': mandatory 'crt' parameter not found.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + crtstore_load = 1; + + if (!final_name) { + final_name = f.crt; + + /* if no alias was used: + * - when a crt-store exists, use @store/crt + * - or use the absolute file (crt_base + crt) + * - or the relative file when no crt_base exists + */ + if (current_crtstore_name[0] != '\0') { + int rv; + + /* add the crt-store name, avoid a double / if the crt starts by it */ + rv = snprintf(alias_name, sizeof(alias_name), "@%s%s%s", current_crtstore_name, f.crt[0] != '/' ? "/" : "", f.crt); + if (rv >= sizeof(alias_name)) { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', too long, max len is %zd.\n", + file, linenum, args[cur_arg], args[cur_arg + 1], sizeof(alias_name)); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = alias_name; + } else if (global_ssl.crt_base && *f.crt != '/') { + int rv; + /* When no crt_store name, complete the name in the ckch_tree with 'crt-base' */ + + rv = snprintf(store_path, sizeof(store_path), "%s/%s", global_ssl.crt_base, f.crt); + if (rv >= sizeof(store_path)) { + memprintf(err, "'%s/%s' : path too long", global_ssl.crt_base, f.crt); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = store_path; + } + } + /* process and insert the ckch_store */ + c = ckch_store_new(final_name); + if (!c) + goto alloc_error; + + err_code |= ckch_store_load_files(&f, c, 0, err); + if (err_code & ERR_FATAL) + goto out; + + c->conf = f; + c->conf.used = CKCH_CONF_SET_CRTSTORE; + + if (ebst_insert(&ckchs_tree, &c->node) != &c->node) { + memprintf(err,"parsing [%s:%d] : '%s' in section 'crt-store': store '%s' was already defined.", + file, linenum, args[0], c->path); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + +out: + /* free ckch_conf content */ + if (err_code & ERR_FATAL) + ckch_store_free(c); + return err_code; + +alloc_error: + ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum); + err_code |= ERR_ALERT | ERR_ABORT; + goto out; +} + +/* + * Parse "crt-store" section and create corresponding ckch_stores. + * + * The function returns 0 in success case, otherwise, it returns error + * flags. + */ +static int cfg_parse_crtstore(const char *file, int linenum, char **args, int kwm) +{ + struct cfg_kw_list *kwl; + const char *best; + int index; + int rc = 0; + int err_code = 0; + char *errmsg = NULL; + + if (strcmp(args[0], "crt-store") == 0) { /* new crt-store section */ + if (!*args[1]) { + current_crtstore_name[0] = '\0'; + } else { + rc = snprintf(current_crtstore_name, sizeof(current_crtstore_name), "%s", args[1]); + if (rc >= sizeof(current_crtstore_name)) { + ha_alert("parsing [%s:%d] : 'crt-store' <name> argument is too long.\n", file, linenum); + current_crtstore_name[0] = '\0'; + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + } + + if (*args[2]) { + ha_alert("parsing [%s:%d] : 'crt-store' section only supports a <name> argument.\n", file, linenum); + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + /* copy the crt_base and key_base */ + ha_free(¤t_crtbase); + if (global_ssl.crt_base) + current_crtbase = strdup(global_ssl.crt_base); + ha_free(¤t_keybase); + if (global_ssl.key_base) + current_keybase = strdup(global_ssl.key_base); + crtstore_load = 0; + + goto out; + } + + list_for_each_entry(kwl, &cfg_keywords.list, list) { + for (index = 0; kwl->kw[index].kw != NULL; index++) { + if (kwl->kw[index].section != CFG_CRTSTORE) + continue; + if (strcmp(kwl->kw[index].kw, args[0]) == 0) { + if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) { + ha_alert("%s\n", errmsg); + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + + /* prepare error message just in case */ + rc = kwl->kw[index].parse(args, CFG_CRTSTORE, NULL, NULL, file, linenum, &errmsg); + if (rc & ERR_ALERT) { + ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg); + err_code |= rc; + goto out; + } + else if (rc & ERR_WARN) { + ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg); + err_code |= rc; + goto out; + } + goto out; + } + } + } + + best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_CRTSTORE, NULL); + if (best) + ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best); + else + ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + +out: + if (err_code & ERR_FATAL) + err_code |= ERR_ABORT; + free(errmsg); + return err_code; +} + +static int cfg_post_parse_crtstore() +{ + current_crtstore_name[0] = '\0'; + ha_free(¤t_crtbase); + ha_free(¤t_keybase); + + return ERR_NONE; +} + +REGISTER_CONFIG_SECTION("crt-store", cfg_parse_crtstore, cfg_post_parse_crtstore); + +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_CRTSTORE, "crt-base", crtstore_parse_path_base }, + { CFG_CRTSTORE, "key-base", crtstore_parse_path_base }, + { CFG_CRTSTORE, "load", crtstore_parse_load }, + { 0, NULL, NULL }, +}}; +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/ssl_crtlist.c b/src/ssl_crtlist.c index d788bec..71fa0a0 100644 --- a/src/ssl_crtlist.c +++ b/src/ssl_crtlist.c @@ -356,7 +356,7 @@ struct crtlist *crtlist_new(const char *filename, int unique) * <crt_path> is a ptr in <line> * Return an error code */ -int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err) +int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, struct ckch_conf *cc, const char *file, int linenum, int from_cli, char **err) { int cfgerr = 0; int arg, newarg, cur_arg, i, ssl_b = 0, ssl_e = 0; @@ -438,19 +438,22 @@ int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, cfgerr |= ERR_WARN; } - ssl_conf = calloc(1, sizeof *ssl_conf); - if (!ssl_conf) { - memprintf(err, "not enough memory!"); - cfgerr |= ERR_ALERT | ERR_FATAL; - goto error; - } } cur_arg = ssl_b ? ssl_b : 1; while (cur_arg < ssl_e) { newarg = 0; + /* look for ssl_conf keywords */ for (i = 0; ssl_crtlist_kws[i].kw != NULL; i++) { if (strcmp(ssl_crtlist_kws[i].kw, args[cur_arg]) == 0) { + if (!ssl_conf) + ssl_conf = calloc(1, sizeof *ssl_conf); + if (!ssl_conf) { + memprintf(err, "not enough memory!"); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } + newarg = 1; cfgerr |= ssl_crtlist_kws[i].parse(args, cur_arg, NULL, ssl_conf, from_cli, err); if (cur_arg + 1 + ssl_crtlist_kws[i].skip > ssl_e) { @@ -460,9 +463,22 @@ int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, goto error; } cur_arg += 1 + ssl_crtlist_kws[i].skip; - break; + goto out; } } + if (cc) { + /* look for ckch_conf keywords */ + cfgerr |= ckch_conf_parse(args, cur_arg, cc, &newarg, file, linenum, err); + if (cfgerr & ERR_FATAL) + goto error; + + if (newarg) { + cur_arg += 2; /* skip 2 words if the keyword was found */ + cc->used = CKCH_CONF_SET_CRTLIST; /* if they are options they must be used everywhere */ + } + + } +out: if (!cfgerr && !newarg) { memprintf(err, "parsing [%s:%d]: unknown ssl keyword %s", file, linenum, args[cur_arg]); @@ -521,6 +537,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu char *crt_path; char path[MAXPATHLEN+1]; struct ckch_store *ckchs; + struct ckch_conf cc = {}; int found = 0; if (missing_lf != -1) { @@ -562,7 +579,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu goto error; } - cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, file, linenum, 0, err); + cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, &cc, file, linenum, 0, err); if (cfgerr & ERR_CODE) goto error; @@ -573,7 +590,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu continue; } - if (*crt_path != '/' && global_ssl.crt_base) { + if (*crt_path != '@' && *crt_path != '/' && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(crt_path)) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, crt_path) > sizeof(path)) { memprintf(err, "parsing [%s:%d]: '%s' : path too long", @@ -589,17 +606,18 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu if (ckchs == NULL) { if (stat(crt_path, &buf) == 0) { found++; - - ckchs = ckchs_load_cert_file(crt_path, err); + free(cc.crt); + cc.crt = strdup(crt_path); + ckchs = ckch_store_new_load_files_conf(crt_path, &cc, err); if (ckchs == NULL) { cfgerr |= ERR_ALERT | ERR_FATAL; goto error; } + ckchs->conf = cc; + entry->node.key = ckchs; entry->crtlist = newlist; - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry->node); LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store); @@ -614,6 +632,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu char fp[MAXPATHLEN+1] = {0}; int n = 0; struct crtlist_entry *entry_dup = entry; /* use the previous created entry */ + for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) { struct stat buf; int ret; @@ -625,7 +644,13 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu ckchs = ckchs_lookup(fp); if (!ckchs) { if (stat(fp, &buf) == 0) { - ckchs = ckchs_load_cert_file(fp, err); + + if (cc.used) { + memprintf(err, "%sCan't load '%s'. Using crt-store keyword is not compatible with multi certificates bundle.\n", + err && *err ? *err : "", crt_path); + cfgerr |= ERR_ALERT | ERR_FATAL; + } + ckchs = ckch_store_new_load_files_path(fp, err); if (!ckchs) { cfgerr |= ERR_ALERT | ERR_FATAL; goto error; @@ -649,12 +674,6 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu entry_dup->node.key = ckchs; entry_dup->crtlist = newlist; - cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err); - if (cfgerr & ERR_FATAL) - goto error; - - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry_dup->node); LIST_APPEND(&newlist->ord_entries, &entry_dup->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry_dup->by_ckch_store); @@ -676,15 +695,15 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu } } else { + if (ckch_conf_cmp(&ckchs->conf, &cc, err) != 0) { + memprintf(err, "'%s' in crt-list '%s' line %d, is already defined with incompatible parameters:\n %s", crt_path, file, linenum, err ? *err : ""); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } + entry->node.key = ckchs; entry->crtlist = newlist; - cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err); - if (cfgerr & ERR_FATAL) - goto error; - - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry->node); LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store); @@ -711,6 +730,8 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu error: crtlist_entry_free(entry); + /* FIXME: free cc */ + fclose(f); crtlist_free(newlist); return cfgerr; @@ -774,7 +795,7 @@ int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlis ckchs = ckchs_lookup(fp); if (ckchs == NULL) - ckchs = ckchs_load_cert_file(fp, err); + ckchs = ckch_store_new_load_files_path(fp, err); if (ckchs == NULL) { free(de); free(entry); @@ -808,21 +829,27 @@ end: * Take an ssl_bind_conf structure and append the configuration line used to * create it in the buffer */ -static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf *conf) +static void dump_crtlist_conf(struct buffer *buf, const struct ssl_bind_conf *conf, const struct ckch_conf *cc) { int space = 0; - if (conf == NULL) + if (conf == NULL && cc->used == 0) return; chunk_appendf(buf, " ["); + + + if (conf == NULL) + goto dump_ckch; + + /* first dump all ssl_conf keywords */ + #ifdef OPENSSL_NPN_NEGOTIATED if (conf->npn_str) { int len = conf->npn_len; char *ptr = conf->npn_str; int comma = 0; - if (space) chunk_appendf(buf, " "); chunk_appendf(buf, "npn "); while (len) { unsigned short size; @@ -941,13 +968,23 @@ static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf space++; } - if (conf->ocsp_update != SSL_SOCK_OCSP_UPDATE_DFLT) { + /* then dump the ckch_conf */ +dump_ckch: + if (!cc->used) + goto end; + + if (cc->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_OFF) { + if (space) chunk_appendf(buf, " "); + chunk_appendf(buf, "ocsp-update off"); + space++; + } else if (cc->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { if (space) chunk_appendf(buf, " "); - chunk_appendf(buf, "ocsp-update %s", - conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_OFF ? "off" : "on"); + chunk_appendf(buf, "ocsp-update on"); space++; } +end: + chunk_appendf(buf, "]"); return; @@ -1030,7 +1067,7 @@ static int cli_io_handler_dump_crtlist_entries(struct appctx *appctx) chunk_appendf(trash, "%s", filename); if (ctx->mode == 's') /* show */ chunk_appendf(trash, ":%d", entry->linenum); - dump_crtlist_sslconf(trash, entry->ssl_conf); + dump_crtlist_conf(trash, entry->ssl_conf, &store->conf); dump_crtlist_filters(trash, entry); chunk_appendf(trash, "\n"); @@ -1128,7 +1165,6 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) { struct add_crtlist_ctx *ctx = appctx->svcctx; struct bind_conf_list *bind_conf_node; - struct stconn *sc = appctx_sc(appctx); struct crtlist *crtlist = ctx->crtlist; struct crtlist_entry *entry = ctx->entry; struct ckch_store *store = entry->node.key; @@ -1139,10 +1175,6 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) /* for each bind_conf which use the crt-list, a new ckch_inst must be * created. */ - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - switch (ctx->state) { case ADDCRT_ST_INIT: /* This state just print the update message */ @@ -1173,7 +1205,7 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) /* we don't support multi-cert bundles, only simple ones */ ctx->err = NULL; - errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &new_inst, &ctx->err); + errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, 0, &new_inst, &ctx->err); if (errcode & ERR_CODE) { ctx->state = ADDCRT_ST_ERROR; goto error; @@ -1265,6 +1297,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc struct ebpt_node *inserted; struct crtlist *crtlist; struct crtlist_entry *entry = NULL; + struct ckch_conf cc = {}; char *end; if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) @@ -1295,6 +1328,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } + if (payload) { char *lf; @@ -1304,7 +1338,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } /* cert_path is filled here */ - cfgerr |= crtlist_parse_line(payload, &cert_path, entry, "CLI", 1, 1, &err); + cfgerr |= crtlist_parse_line(payload, &cert_path, entry, &cc, "CLI", 1, 1, &err); if (cfgerr & ERR_CODE) goto error; } else { @@ -1335,7 +1369,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc *slash = '/'; } - if (*cert_path != '/' && global_ssl.crt_base) { + if (*cert_path != '@' && *cert_path != '/' && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(cert_path)) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, cert_path) > sizeof(path)) { memprintf(&err, "'%s' : path too long", cert_path); @@ -1355,15 +1389,23 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } - /* No need to check 'ocsp-update' inconsistency on a store that is not - * used yet (it was just added through the CLI for instance). + /* We can use a crt-store keyword when: + * - no ckch_inst are linked OR + * - ckch_inst are linked but exact same ckch_conf is used. */ - if (!LIST_ISEMPTY(&store->ckch_inst) && - ocsp_update_check_cfg_consistency(store, entry, cert_path, &err)) - goto error; + if (LIST_ISEMPTY(&store->ckch_inst)) { - if (entry->ssl_conf) - store->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; + store->conf = cc; + /* fresh new, run more init (for example init ocsp-update tasks) */ + cfgerr |= ckch_store_load_files(&cc, store, 1, &err); + if (cfgerr & ERR_FATAL) + goto error; + + } else if (ckch_conf_cmp(&store->conf, &cc, &err) != 0) { + memprintf(&err, "'%s' is already instantiated with incompatible parameters:\n %s", cert_path, err ? err : ""); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } /* check if it's possible to insert this new crtlist_entry */ entry->node.key = store; @@ -1374,8 +1416,8 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc } /* this is supposed to be a directory (EB_ROOT_UNIQUE), so no ssl_conf are allowed */ - if ((entry->ssl_conf || entry->filters) && eb_gettag(crtlist->entries.b[EB_RGHT])) { - memprintf(&err, "this is a directory, SSL configuration and filters are not allowed"); + if ((entry->ssl_conf || entry->filters || cc.used) && eb_gettag(crtlist->entries.b[EB_RGHT])) { + memprintf(&err, "this is a directory, SSL configuration, crt-store keywords and filters are not allowed"); goto error; } @@ -1391,6 +1433,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc return 0; error: + ckch_conf_clean(&cc); crtlist_entry_free(entry); HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock); err = memprintf(&err, "Can't edit the crt-list: %s\n", err ? err : ""); @@ -1567,4 +1610,3 @@ static struct cli_kw_list cli_kws = {{ },{ }; INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); - diff --git a/src/ssl_gencert.c b/src/ssl_gencert.c new file mode 100644 index 0000000..44dc82c --- /dev/null +++ b/src/ssl_gencert.c @@ -0,0 +1,470 @@ +/* + * SSL 'generate-certificate' option logic. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#include <import/lru.h> + +#include <haproxy/errors.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/ssl_ckch.h> +#include <haproxy/ssl_sock.h> +#include <haproxy/xxhash.h> + +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) +/* X509V3 Extensions that will be added on generated certificates */ +#define X509V3_EXT_SIZE 5 +static char *x509v3_ext_names[X509V3_EXT_SIZE] = { + "basicConstraints", + "nsComment", + "subjectKeyIdentifier", + "authorityKeyIdentifier", + "keyUsage", +}; +static char *x509v3_ext_values[X509V3_EXT_SIZE] = { + "CA:FALSE", + "\"OpenSSL Generated Certificate\"", + "hash", + "keyid,issuer:always", + "nonRepudiation,digitalSignature,keyEncipherment" +}; +/* LRU cache to store generated certificate */ +static struct lru64_head *ssl_ctx_lru_tree = NULL; +static unsigned int ssl_ctx_lru_seed = 0; +static unsigned int ssl_ctx_serial; +__decl_rwlock(ssl_ctx_lru_rwlock); + +#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME + +#ifndef SSL_NO_GENERATE_CERTIFICATES + +/* Configure a DNS SAN extension on a certificate. */ +int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) { + int failure = 0; + X509_EXTENSION *san_ext = NULL; + CONF *conf = NULL; + struct buffer *san_name = get_trash_chunk(); + + conf = NCONF_new(NULL); + if (!conf) { + failure = 1; + goto cleanup; + } + + /* Build an extension based on the DNS entry above */ + chunk_appendf(san_name, "DNS:%s", servername); + san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area); + if (!san_ext) { + failure = 1; + goto cleanup; + } + + /* Add the extension */ + if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) { + failure = 1; + goto cleanup; + } + + /* Success */ + failure = 0; + +cleanup: + if (NULL != san_ext) X509_EXTENSION_free(san_ext); + if (NULL != conf) NCONF_free(conf); + + return failure; +} + +/* Create a X509 certificate with the specified servername and serial. This + * function returns a SSL_CTX object or NULL if an error occurs. */ +static SSL_CTX *ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl) +{ + X509 *cacert = bind_conf->ca_sign_ckch->cert; + EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key; + SSL_CTX *ssl_ctx = NULL; + X509 *newcrt = NULL; + EVP_PKEY *pkey = NULL; + SSL *tmp_ssl = NULL; + CONF *ctmp = NULL; + X509_NAME *name; + const EVP_MD *digest; + X509V3_CTX ctx; + unsigned int i; + int key_type; + struct sni_ctx *sni_ctx; + + sni_ctx = ssl_sock_chose_sni_ctx(bind_conf, "", 1, 1); + if (!sni_ctx) + goto mkcert_error; + + /* Get the private key of the default certificate and use it */ +#ifdef HAVE_SSL_CTX_get0_privatekey + pkey = SSL_CTX_get0_privatekey(sni_ctx->ctx); +#else + tmp_ssl = SSL_new(sni_ctx->ctx); + if (tmp_ssl) + pkey = SSL_get_privatekey(tmp_ssl); +#endif + if (!pkey) + goto mkcert_error; + + /* Create the certificate */ + if (!(newcrt = X509_new())) + goto mkcert_error; + + /* Set version number for the certificate (X509v3) and the serial + * number */ + if (X509_set_version(newcrt, 2L) != 1) + goto mkcert_error; + ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1)); + + /* Set duration for the certificate */ + if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) || + !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365)) + goto mkcert_error; + + /* set public key in the certificate */ + if (X509_set_pubkey(newcrt, pkey) != 1) + goto mkcert_error; + + /* Set issuer name from the CA */ + if (!(name = X509_get_subject_name(cacert))) + goto mkcert_error; + if (X509_set_issuer_name(newcrt, name) != 1) + goto mkcert_error; + + /* Set the subject name using the same, but the CN */ + name = X509_NAME_dup(name); + if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, + (const unsigned char *)servername, + -1, -1, 0) != 1) { + X509_NAME_free(name); + goto mkcert_error; + } + if (X509_set_subject_name(newcrt, name) != 1) { + X509_NAME_free(name); + goto mkcert_error; + } + X509_NAME_free(name); + + /* Add x509v3 extensions as specified */ + ctmp = NCONF_new(NULL); + X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0); + for (i = 0; i < X509V3_EXT_SIZE; i++) { + X509_EXTENSION *ext; + + if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i]))) + goto mkcert_error; + if (!X509_add_ext(newcrt, ext, -1)) { + X509_EXTENSION_free(ext); + goto mkcert_error; + } + X509_EXTENSION_free(ext); + } + + /* Add SAN extension */ + if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) { + goto mkcert_error; + } + + /* Sign the certificate with the CA private key */ + + key_type = EVP_PKEY_base_id(capkey); + + if (key_type == EVP_PKEY_DSA) + digest = EVP_sha1(); + else if (key_type == EVP_PKEY_RSA) + digest = EVP_sha256(); + else if (key_type == EVP_PKEY_EC) + digest = EVP_sha256(); + else { +#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID + int nid; + + if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0) + goto mkcert_error; + if (!(digest = EVP_get_digestbynid(nid))) + goto mkcert_error; +#else + goto mkcert_error; +#endif + } + + if (!(X509_sign(newcrt, capkey, digest))) + goto mkcert_error; + + /* Create and set the new SSL_CTX */ + if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method()))) + goto mkcert_error; + + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ssl_ctx, global_ssl.security_level); + + if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey)) + goto mkcert_error; + if (!SSL_CTX_use_certificate(ssl_ctx, newcrt)) + goto mkcert_error; + if (!SSL_CTX_check_private_key(ssl_ctx)) + goto mkcert_error; + + /* Build chaining the CA cert and the rest of the chain, keep these order */ +#if defined(SSL_CTX_add1_chain_cert) + if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) { + goto mkcert_error; + } + + if (bind_conf->ca_sign_ckch->chain) { + for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) { + X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i); + if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) { + goto mkcert_error; + } + } + } +#endif + + if (newcrt) X509_free(newcrt); + +#ifndef OPENSSL_NO_DH +#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) + SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk); +#else + ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey); +#endif +#endif + +#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) +#if defined(SSL_CTX_set1_curves_list) + { + const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); + if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe)) + goto end; + } +#endif +#else +#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) + { + const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); + EC_KEY *ecc; + int nid; + + if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef) + goto end; + if (!(ecc = EC_KEY_new_by_curve_name(nid))) + goto end; + SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc); + EC_KEY_free(ecc); + } +#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */ +#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */ + end: + return ssl_ctx; + + mkcert_error: + if (ctmp) NCONF_free(ctmp); + if (tmp_ssl) SSL_free(tmp_ssl); + if (ssl_ctx) SSL_CTX_free(ssl_ctx); + if (newcrt) X509_free(newcrt); + return NULL; +} + + +/* Do a lookup for a certificate in the LRU cache used to store generated + * certificates and immediately assign it to the SSL session if not null. */ +SSL_CTX *ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl) +{ + struct lru64 *lru = NULL; + + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); + if (lru && lru->domain) { + if (ssl) + SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return (SSL_CTX *)lru->data; + } + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + } + return NULL; +} + +/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This + * function is not thread-safe, it should only be used to check if a certificate + * exists in the lru cache (with no warranty it will not be removed by another + * thread). It is kept for backward compatibility. */ +SSL_CTX * +ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf) +{ + return ssl_sock_assign_generated_cert(key, bind_conf, NULL); +} + +/* Set a certificate int the LRU cache used to store generated + * certificate. Return 0 on success, otherwise -1 */ +int ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf) +{ + struct lru64 *lru = NULL; + + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); + if (!lru) { + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return -1; + } + if (lru->domain && lru->data) + lru->free((SSL_CTX *)lru->data); + lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return 0; + } + return -1; +} + +/* Compute the key of the certificate. */ +unsigned int +ssl_sock_generated_cert_key(const void *data, size_t len) +{ + return XXH32(data, len, ssl_ctx_lru_seed); +} + +/* Generate a cert and immediately assign it to the SSL session so that the cert's + * refcount is maintained regardless of the cert's presence in the LRU cache. + */ +int ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl) +{ + X509 *cacert = bind_conf->ca_sign_ckch->cert; + SSL_CTX *ssl_ctx = NULL; + struct lru64 *lru = NULL; + unsigned int key; + + key = ssl_sock_generated_cert_key(servername, strlen(servername)); + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0); + if (lru && lru->domain) + ssl_ctx = (SSL_CTX *)lru->data; + if (!ssl_ctx && lru) { + ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); + lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free); + } + SSL_set_SSL_CTX(ssl, ssl_ctx); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return 1; + } + else { + ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); + SSL_set_SSL_CTX(ssl, ssl_ctx); + /* No LRU cache, this CTX will be released as soon as the session dies */ + SSL_CTX_free(ssl_ctx); + return 1; + } + return 0; +} +int ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl) +{ + unsigned int key; + struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index); + + if (conn_get_dst(conn)) { + key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst)); + if (ssl_sock_assign_generated_cert(key, bind_conf, ssl)) + return 1; + } + return 0; +} + +/* Load CA cert file and private key used to generate certificates */ +int +ssl_sock_load_ca(struct bind_conf *bind_conf) +{ + struct proxy *px = bind_conf->frontend; + struct ckch_data *data = NULL; + int ret = 0; + char *err = NULL; + + if (!(bind_conf->options & BC_O_GENERATE_CERTS)) + return ret; + +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) + if (global_ssl.ctx_cache) { + ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache); + } + ssl_ctx_lru_seed = (unsigned int)time(NULL); + ssl_ctx_serial = now_ms; +#endif + + if (!bind_conf->ca_sign_file) { + ha_alert("Proxy '%s': cannot enable certificate generation, " + "no CA certificate File configured at [%s:%d].\n", + px->id, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Allocate cert structure */ + data = calloc(1, sizeof(*data)); + if (!data) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Try to parse file */ + if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, data, &err)) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err); + free(err); + goto failed; + } + + /* Fail if missing cert or pkey */ + if ((!data->cert) || (!data->key)) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Final assignment to bind */ + bind_conf->ca_sign_ckch = data; + return ret; + + failed: + if (data) { + ssl_sock_free_cert_key_and_chain_contents(data); + free(data); + } + + bind_conf->options &= ~BC_O_GENERATE_CERTS; + ret++; + return ret; +} + +/* Release CA cert and private key used to generate certificated */ +void +ssl_sock_free_ca(struct bind_conf *bind_conf) +{ + if (bind_conf->ca_sign_ckch) { + ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch); + ha_free(&bind_conf->ca_sign_ckch); + } +} + +#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */ + + +static void __ssl_gencert_deinit(void) +{ +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) + if (ssl_ctx_lru_tree) { + lru64_destroy(ssl_ctx_lru_tree); + HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock); + } +#endif +} +REGISTER_POST_DEINIT(__ssl_gencert_deinit); + diff --git a/src/ssl_ocsp.c b/src/ssl_ocsp.c index 5b103af..7d3a485 100644 --- a/src/ssl_ocsp.c +++ b/src/ssl_ocsp.c @@ -33,13 +33,6 @@ #include <string.h> #include <unistd.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <netdb.h> -#include <netinet/tcp.h> - -#include <import/ebpttree.h> #include <import/ebsttree.h> #include <import/lru.h> @@ -47,44 +40,27 @@ #include <haproxy/applet.h> #include <haproxy/arg.h> #include <haproxy/base64.h> -#include <haproxy/channel.h> +#include <haproxy/cfgparse.h> #include <haproxy/chunk.h> #include <haproxy/cli.h> #include <haproxy/connection.h> -#include <haproxy/dynbuf.h> #include <haproxy/errors.h> -#include <haproxy/fd.h> #include <haproxy/freq_ctr.h> #include <haproxy/frontend.h> #include <haproxy/global.h> -#include <haproxy/http_rules.h> +#include <haproxy/http_client.h> +#include <haproxy/istbuf.h> #include <haproxy/log.h> #include <haproxy/openssl-compat.h> -#include <haproxy/pattern-t.h> -#include <haproxy/proto_tcp.h> #include <haproxy/proxy.h> -#include <haproxy/sample.h> -#include <haproxy/sc_strm.h> -#include <haproxy/quic_conn.h> -#include <haproxy/quic_tp.h> -#include <haproxy/server.h> #include <haproxy/shctx.h> #include <haproxy/ssl_ckch.h> -#include <haproxy/ssl_crtlist.h> +#include <haproxy/ssl_ocsp-t.h> #include <haproxy/ssl_sock.h> #include <haproxy/ssl_utils.h> -#include <haproxy/stats.h> -#include <haproxy/stconn.h> -#include <haproxy/stream-t.h> #include <haproxy/task.h> #include <haproxy/ticks.h> #include <haproxy/time.h> -#include <haproxy/tools.h> -#include <haproxy/vars.h> -#include <haproxy/xxhash.h> -#include <haproxy/istbuf.h> -#include <haproxy/ssl_ocsp-t.h> -#include <haproxy/http_client.h> /* ***** READ THIS before adding code here! ***** @@ -98,6 +74,8 @@ * to conditionally define it in openssl-compat.h than using lots of ifdefs. */ +static struct sockaddr_storage *ocsp_update_dst; + #ifndef OPENSSL_NO_OCSP int ocsp_ex_index = -1; @@ -383,6 +361,25 @@ int ssl_sock_update_ocsp_response(struct buffer *ocsp_response, char **err) #if !defined OPENSSL_IS_BORINGSSL /* + * Must be called under ocsp_tree_lock lock. + */ +static void ssl_sock_free_ocsp_data(struct certificate_ocsp *ocsp) +{ + ebmb_delete(&ocsp->key); + eb64_delete(&ocsp->next_update); + X509_free(ocsp->issuer); + ocsp->issuer = NULL; + sk_X509_pop_free(ocsp->chain, X509_free); + ocsp->chain = NULL; + chunk_destroy(&ocsp->response); + if (ocsp->uri) { + ha_free(&ocsp->uri->area); + ha_free(&ocsp->uri); + } + free(ocsp); +} + +/* * Decrease the refcount of the struct ocsp_response and frees it if it's not * used anymore. Also removes it from the tree if free'd. */ @@ -392,21 +389,37 @@ void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp) return; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); + ocsp->refcount_store--; + if (ocsp->refcount_store <= 0) { + eb64_delete(&ocsp->next_update); + /* Might happen if some ongoing requests kept using an SSL_CTX + * that referenced this OCSP response after the corresponding + * ckch_store was deleted or changed (via cli commands for + * instance). + */ + if (ocsp->refcount <= 0) + ssl_sock_free_ocsp_data(ocsp); + } + HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); +} + +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp) +{ + if (!ocsp) + return; + + HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); ocsp->refcount--; if (ocsp->refcount <= 0) { - ebmb_delete(&ocsp->key); eb64_delete(&ocsp->next_update); - X509_free(ocsp->issuer); - ocsp->issuer = NULL; - sk_X509_pop_free(ocsp->chain, X509_free); - ocsp->chain = NULL; - chunk_destroy(&ocsp->response); - if (ocsp->uri) { - ha_free(&ocsp->uri->area); - ha_free(&ocsp->uri); - } + /* Might happen if some ongoing requests kept using an SSL_CTX + * that referenced this OCSP response after the corresponding + * ckch_store was deleted or changed (via cli commands for + * instance). + */ + if (ocsp->refcount_store <= 0) + ssl_sock_free_ocsp_data(ocsp); - free(ocsp); } HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); } @@ -626,13 +639,13 @@ void ssl_sock_ocsp_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int id ocsp_arg = ptr; if (ocsp_arg->is_single) { - ssl_sock_free_ocsp(ocsp_arg->s_ocsp); + ssl_sock_free_ocsp_instance(ocsp_arg->s_ocsp); ocsp_arg->s_ocsp = NULL; } else { int i; for (i = 0; i < SSL_SOCK_NUM_KEYTYPES; i++) { - ssl_sock_free_ocsp(ocsp_arg->m_ocsp[i]); + ssl_sock_free_ocsp_instance(ocsp_arg->m_ocsp[i]); ocsp_arg->m_ocsp[i] = NULL; } } @@ -907,7 +920,7 @@ static int ssl_ocsp_task_schedule() } REGISTER_POST_CHECK(ssl_ocsp_task_schedule); -void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp); +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp); void ssl_destroy_ocsp_update_task(void) { @@ -929,7 +942,7 @@ void ssl_destroy_ocsp_update_task(void) task_destroy(ocsp_update_task); ocsp_update_task = NULL; - ssl_sock_free_ocsp(ssl_ocsp_task_ctx.cur_ocsp); + ssl_sock_free_ocsp_instance(ssl_ocsp_task_ctx.cur_ocsp); ssl_ocsp_task_ctx.cur_ocsp = NULL; if (ssl_ocsp_task_ctx.hc) { @@ -966,12 +979,6 @@ static inline void ssl_ocsp_set_next_update(struct certificate_ocsp *ocsp) */ int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp) { - /* This entry was only supposed to be updated once, it does not need to - * be reinserted into the update tree. - */ - if (ocsp->update_once) - return 0; - /* Set next_update based on current time and the various OCSP * minimum/maximum update times. */ @@ -980,7 +987,12 @@ int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp) ocsp->fail_count = 0; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); - eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->updating = 0; + /* An entry with update_once set to 1 was only supposed to be updated + * once, it does not need to be reinserted into the update tree. + */ + if (!ocsp->update_once) + eb64_insert(&ocsp_update_tree, &ocsp->next_update); HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); return 0; @@ -997,12 +1009,6 @@ int ssl_ocsp_update_insert_after_error(struct certificate_ocsp *ocsp) { int replay_delay = 0; - /* This entry was only supposed to be updated once, it does not need to - * be reinserted into the update tree. - */ - if (ocsp->update_once) - return 0; - /* * Set next_update based on current time and the various OCSP * minimum/maximum update times. @@ -1025,7 +1031,12 @@ int ssl_ocsp_update_insert_after_error(struct certificate_ocsp *ocsp) ocsp->next_update.key = date.tv_sec + replay_delay; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); - eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->updating = 0; + /* An entry with update_once set to 1 was only supposed to be updated + * once, it does not need to be reinserted into the update tree. + */ + if (!ocsp->update_once) + eb64_insert(&ocsp_update_tree, &ocsp->next_update); HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); return 0; @@ -1077,10 +1088,8 @@ void ocsp_update_response_end_cb(struct httpclient *hc) /* - * Send a log line that will mimic this previously used logformat : - * char ocspupdate_log_format[] = "%ci:%cp [%tr] %ft %[ssl_ocsp_certname] \ - * %[ssl_ocsp_status] %{+Q}[ssl_ocsp_status_str] %[ssl_ocsp_fail_cnt] \ - * %[ssl_ocsp_success_cnt]"; + * Send a log line that will contain only OCSP update related information: + * "<proxy_name> <ssl_ocsp_certname> <ocsp_status> \"<ocsp_status_str>\" <ocsp_fail_cnt> <ocsp_success_cnt>" * We can't use the regular sess_log function because we don't have any control * over the stream and session used by the httpclient which might not exist * anymore by the time we call this function. @@ -1090,8 +1099,6 @@ static void ssl_ocsp_send_log() int status_str_len = 0; char *status_str = NULL; struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; - struct tm tm; - char timebuf[25]; if (!httpclient_ocsp_update_px) return; @@ -1101,11 +1108,7 @@ static void ssl_ocsp_send_log() status_str = istptr(ocsp_update_errors[ssl_ocsp_task_ctx.update_status]); } - get_localtime(date.tv_sec, &tm); - date2str_log(timebuf, &tm, &date, 25); - - send_log(httpclient_ocsp_update_px, LOG_INFO, "-:- [%s] %s %s %u \"%.*s\" %u %u", - timebuf, + send_log(httpclient_ocsp_update_px, LOG_NOTICE, "%s %s %u \"%.*s\" %u %u", httpclient_ocsp_update_px->id, ocsp->path, ssl_ocsp_task_ctx.update_status, @@ -1211,7 +1214,7 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, /* Reinsert the entry into the update list so that it can be updated later */ ssl_ocsp_update_insert(ocsp); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); ctx->cur_ocsp = NULL; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); @@ -1255,6 +1258,7 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, eb64_delete(&ocsp->next_update); ++ocsp->refcount; + ocsp->updating = 1; ctx->cur_ocsp = ocsp; ocsp->last_update_status = OCSP_UPDT_UNKNOWN; @@ -1293,6 +1297,15 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, goto leave; } + /* if the ocsp_update.http_proxy option was set */ + if (ocsp_update_dst) { + hc->flags |= HC_F_HTTPPROXY; + if (!sockaddr_alloc(&hc->dst, ocsp_update_dst, sizeof(*ocsp_update_dst))) { + ha_alert("ocsp-update: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__); + goto leave; + } + } + if (httpclient_req_gen(hc, hc->req.url, hc->req.meth, b_data(req_body) ? ocsp_request_hdrs : NULL, b_data(req_body) ? ist2(b_orig(req_body), b_data(req_body)) : IST_NULL) != ERR_NONE) { @@ -1321,7 +1334,7 @@ leave: ++ctx->cur_ocsp->num_failure; ssl_ocsp_update_insert_after_error(ctx->cur_ocsp); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); ctx->cur_ocsp = NULL; } if (hc) @@ -1350,7 +1363,7 @@ http_error: if (hc) httpclient_stop_and_destroy(hc); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); /* Set next_wakeup to the new first entry of the tree */ eb = eb64_first(&ocsp_update_tree); @@ -1378,11 +1391,11 @@ static int ssl_ocsp_update_precheck() /* initialize the OCSP update dedicated httpclient */ httpclient_ocsp_update_px = httpclient_create_proxy("<OCSP-UPDATE>"); if (!httpclient_ocsp_update_px) - return 1; - httpclient_ocsp_update_px->conf.logformat_string = httpclient_log_format; + return ERR_RETRYABLE; + httpclient_ocsp_update_px->logformat.str = httpclient_log_format; httpclient_ocsp_update_px->options2 |= PR_O2_NOLOGNORM; - return 0; + return ERR_NONE; } /* initialize the proxy and servers for the HTTP client */ @@ -1433,13 +1446,24 @@ static int cli_parse_update_ocsp_response(char **args, char *payload, struct app goto end; } - update_once = (ocsp->next_update.node.leaf_p == NULL); - eb64_delete(&ocsp->next_update); + /* No need to try to update this response, it is already being updated. */ + if (!ocsp->updating) { + update_once = (ocsp->next_update.node.leaf_p == NULL); + eb64_delete(&ocsp->next_update); - /* Insert the entry at the beginning of the update tree. */ - ocsp->next_update.key = 0; - eb64_insert(&ocsp_update_tree, &ocsp->next_update); - ocsp->update_once = update_once; + /* Insert the entry at the beginning of the update tree. + * We don't need to increase the reference counter on the + * certificate_ocsp structure because we would not have a way to + * decrease it afterwards since this update operation is asynchronous. + * If the corresponding entry were to be destroyed before the update can + * be performed, which is pretty unlikely, it would not be such a + * problem because that would mean that the OCSP response is not + * actually used. + */ + ocsp->next_update.key = 0; + eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->update_once = update_once; + } HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); @@ -1675,20 +1699,12 @@ yield: #endif } -/* Check if the ckch_store and the entry does have the same configuration */ -int ocsp_update_check_cfg_consistency(struct ckch_store *store, struct crtlist_entry *entry, char *crt_path, char **err) +static void cli_release_show_ocspresponse(struct appctx *appctx) { - int err_code = ERR_NONE; - - if (store->data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_DFLT || entry->ssl_conf) { - if ((!entry->ssl_conf && store->data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) - || (entry->ssl_conf && entry->ssl_conf->ocsp_update != SSL_SOCK_OCSP_UPDATE_OFF && - store->data->ocsp_update_mode != entry->ssl_conf->ocsp_update)) { - memprintf(err, "%sIncompatibilities found in OCSP update mode for certificate %s\n", err && *err ? *err : "", crt_path); - err_code |= ERR_ALERT | ERR_FATAL; - } - } - return err_code; + struct show_ocspresp_cli_ctx *ctx = appctx->svcctx; + + if (ctx) + ssl_sock_free_ocsp_instance(ctx->ocsp); } struct show_ocsp_updates_ctx { @@ -1845,98 +1861,168 @@ static void cli_release_show_ocsp_updates(struct appctx *appctx) HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); } +static int ssl_parse_global_ocsp_maxdelay(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int value = 0; + + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects an integer argument.", args[0]); + return -1; + } + + value = atoi(args[1]); + if (value < 0) { + memprintf(err, "'%s' expects a positive numeric value.", args[0]); + return -1; + } + + if (global_ssl.ocsp_update.delay_min > value) { + memprintf(err, "'%s' can not be lower than tune.ssl.ocsp-update.mindelay.", args[0]); + return -1; + } + + global_ssl.ocsp_update.delay_max = value; + + return 0; +} -static int -smp_fetch_ssl_ocsp_certid(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct buffer *data = get_trash_chunk(); - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int value = 0; - if (!ocsp) - return 0; + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects an integer argument.", args[0]); + return -1; + } + + value = atoi(args[1]); + if (value < 0) { + memprintf(err, "'%s' expects a positive numeric value.", args[0]); + return -1; + } - dump_binary(data, (char *)ocsp->key_data, ocsp->key_length); + if (value > global_ssl.ocsp_update.delay_max) { + memprintf(err, "'%s' can not be higher than tune.ssl.ocsp-update.maxdelay.", args[0]); + return -1; + } - smp->data.type = SMP_T_STR; - smp->data.u.str = *data; - return 1; + global_ssl.ocsp_update.delay_min = value; + + return 0; } -static int -smp_fetch_ssl_ocsp_certname(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_update_mode(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + if (!*args[1]) { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - if (!ocsp) - return 0; + if (strcmp(args[1], "on") == 0) + global_ssl.ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_ON; + else if (strcmp(args[1], "off") == 0) + global_ssl.ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_OFF; + else { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - smp->data.type = SMP_T_STR; - smp->data.u.str.area = ocsp->path; - smp->data.u.str.data = strlen(ocsp->path); - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_status(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_update_disable(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + if (!*args[1]) { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - if (!ocsp) - return 0; + if (strcmp(args[1], "on") == 0) + global_ssl.ocsp_update.disable = 1; + else if (strcmp(args[1], "off") == 0) + global_ssl.ocsp_update.disable = 0; + else { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ssl_ocsp_task_ctx.update_status; - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_status_str(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ocsp_update_parse_global_http_proxy(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + struct sockaddr_storage *sk; + char *errmsg = NULL; - if (!ocsp) - return 0; + if (too_many_args(1, args, err, NULL)) + return -1; - if (ssl_ocsp_task_ctx.update_status >= OCSP_UPDT_ERR_LAST) - return 0; + sockaddr_free(&ocsp_update_dst); + /* 'sk' is statically allocated (no need to be freed). */ + sk = str2sa_range(args[1], NULL, NULL, NULL, NULL, NULL, NULL, + &errmsg, NULL, NULL, + PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT); + if (!sk) { + ha_alert("ocsp-update: Failed to parse destination address in %s\n", errmsg); + free(errmsg); + return -1; + } - smp->data.type = SMP_T_STR; - smp->data.u.str = ist2buf(ocsp_update_errors[ssl_ocsp_task_ctx.update_status]); + if (!sockaddr_alloc(&ocsp_update_dst, sk, sizeof(*sk))) { + ha_alert("ocsp-update: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__); + return -1; + } - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_fail_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +int ocsp_update_init(void *value, char *buf, struct ckch_data *d, int cli, char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int ocsp_update_mode = *(int *)value; + int ret = 0; - if (!ocsp) - return 0; + /* inherit from global section */ + ocsp_update_mode = (ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : ocsp_update_mode; - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ocsp->num_failure; - return 1; + if (!global_ssl.ocsp_update.disable && ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + /* We might need to create the main ocsp update task */ + ret = ssl_create_ocsp_update_task(err); + } + + return ret; } -static int -smp_fetch_ssl_ocsp_success_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +int ocsp_update_postparser_init() { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int ret = 0; + char *err = NULL; - if (!ocsp) - return 0; + /* if the global ocsp-update.mode option is not set to "on", there is + * no need to start the task, it would have been started when parsing a + * crt-store or a crt-list */ + if (!global_ssl.ocsp_update.disable && (global_ssl.ocsp_update.mode == SSL_SOCK_OCSP_UPDATE_ON)) { + /* We might need to create the main ocsp update task */ + ret = ssl_create_ocsp_update_task(&err); + } - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ocsp->num_success; - return 1; + return ret; } static struct cli_kw_list cli_kws = {{ },{ { { "set", "ssl", "ocsp-response", NULL }, "set ssl ocsp-response <resp|payload> : update a certificate's OCSP Response from a base64-encode DER", cli_parse_set_ocspresponse, NULL }, - { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [[text|base64] id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response (in text or base64 format)", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, NULL }, + { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [[text|base64] id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response (in text or base64 format)", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, cli_release_show_ocspresponse }, { { "show", "ssl", "ocsp-updates", NULL }, "show ssl ocsp-updates : display information about the next 'nb' ocsp responses that will be updated automatically", cli_parse_show_ocsp_updates, cli_io_handler_show_ocsp_updates, cli_release_show_ocsp_updates }, #if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL) { { "update", "ssl", "ocsp-response", NULL }, "update ssl ocsp-response <certfile> : send ocsp request and update stored ocsp response", cli_parse_update_ocsp_response, NULL, NULL }, @@ -1946,27 +2032,22 @@ static struct cli_kw_list cli_kws = {{ },{ INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); - -/* Note: must not be declared <const> as its list will be overwritten. - * Please take care of keeping this list alphabetically sorted. - * - * Those fetches only have a valid value during an OCSP update process so they - * can only be used in a log format of a log line built by the update process - * task itself. - */ -static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { - { "ssl_ocsp_certid", smp_fetch_ssl_ocsp_certid, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_certname", smp_fetch_ssl_ocsp_certname, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_status", smp_fetch_ssl_ocsp_status, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { "ssl_ocsp_status_str", smp_fetch_ssl_ocsp_status_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_fail_cnt", smp_fetch_ssl_ocsp_fail_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { "ssl_ocsp_success_cnt", smp_fetch_ssl_ocsp_success_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { NULL, NULL, 0, 0, 0 }, +static struct cfg_kw_list cfg_kws = {ILH, { +#ifndef OPENSSL_NO_OCSP + { CFG_GLOBAL, "ocsp-update.disable", ssl_parse_global_ocsp_update_disable }, + { CFG_GLOBAL, "tune.ssl.ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, + { CFG_GLOBAL, "ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, + { CFG_GLOBAL, "tune.ssl.ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, + { CFG_GLOBAL, "ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, + { CFG_GLOBAL, "ocsp-update.mode", ssl_parse_global_ocsp_update_mode }, + { CFG_GLOBAL, "ocsp-update.httpproxy", ocsp_update_parse_global_http_proxy }, +#endif + { 0, NULL, NULL }, }}; -INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords); - +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +REGISTER_CONFIG_POSTPARSER("ocsp-update", ocsp_update_postparser_init); /* * Local variables: * c-indent-level: 8 diff --git a/src/ssl_sample.c b/src/ssl_sample.c index 22b4072..0757c12 100644 --- a/src/ssl_sample.c +++ b/src/ssl_sample.c @@ -219,6 +219,10 @@ static inline int sample_check_arg_base64(struct arg *arg, char **err) static int check_aes_gcm(struct arg *args, struct sample_conv *conv, const char *file, int line, char **err) { + if (conv->kw[8] == 'd') + /* flag it as "aes_gcm_dec" */ + args[0].type_flags = 1; + switch(args[0].data.sint) { case 128: case 192: @@ -238,7 +242,8 @@ static int check_aes_gcm(struct arg *args, struct sample_conv *conv, memprintf(err, "failed to parse key : %s", *err); return 0; } - if (!sample_check_arg_base64(&args[3], err)) { + if ((args[0].type_flags && !sample_check_arg_base64(&args[3], err)) || + (!args[0].type_flags && !vars_check_arg(&args[3], err))) { memprintf(err, "failed to parse aead_tag : %s", *err); return 0; } @@ -246,13 +251,37 @@ static int check_aes_gcm(struct arg *args, struct sample_conv *conv, return 1; } +#define sample_conv_aes_gcm_init(a, b, c, d, e, f) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptInit_ex(b, c, d, e, f) : \ + EVP_EncryptInit_ex(b, c, d, e, f); \ + _ret; \ + }) + +#define sample_conv_aes_gcm_update(a, b, c, d, e, f) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptUpdate(b, c, d, e, f) : \ + EVP_EncryptUpdate(b, c, d, e, f); \ + _ret; \ + }) + +#define sample_conv_aes_gcm_final(a, b, c, d) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptFinal_ex(b, c, d) : \ + EVP_EncryptFinal_ex(b, c, d); \ + _ret; \ + }) + /* Arguments: AES size in bits, nonce, key, tag. The last three arguments are base64 encoded */ -static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, void *private) +static int sample_conv_aes_gcm(const struct arg *arg_p, struct sample *smp, void *private) { struct sample nonce, key, aead_tag; struct buffer *smp_trash = NULL, *smp_trash_alloc = NULL; EVP_CIPHER_CTX *ctx = NULL; - int dec_size, ret; + int size, ret, dec; smp_trash_alloc = alloc_trash_chunk(); if (!smp_trash_alloc) @@ -278,30 +307,33 @@ static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, goto err; if (arg_p[1].type == ARGT_VAR) { - dec_size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size); - if (dec_size < 0) + size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size); + if (size < 0) goto err; - smp_trash->data = dec_size; + smp_trash->data = size; nonce.data.u.str = *smp_trash; } + /* encrypt (0) or decrypt (1) */ + dec = (arg_p[0].type_flags == 1); + /* Set cipher type and mode */ switch(arg_p[0].data.sint) { case 128: - EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_128_gcm(), NULL, NULL, NULL); break; case 192: - EVP_DecryptInit_ex(ctx, EVP_aes_192_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_192_gcm(), NULL, NULL, NULL); break; case 256: - EVP_DecryptInit_ex(ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); break; } EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, nonce.data.u.str.data, NULL); /* Initialise IV */ - if(!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area)) + if(!sample_conv_aes_gcm_init(dec, ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area)) goto err; smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt); @@ -309,42 +341,66 @@ static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, goto err; if (arg_p[2].type == ARGT_VAR) { - dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size); - if (dec_size < 0) + size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size); + if (size < 0) goto err; - smp_trash->data = dec_size; + smp_trash->data = size; key.data.u.str = *smp_trash; } /* Initialise key */ - if (!EVP_DecryptInit_ex(ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL)) + if (!sample_conv_aes_gcm_init(dec, ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL)) goto err; - if (!EVP_DecryptUpdate(ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data, - (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data)) + if (!sample_conv_aes_gcm_update(dec, ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data, + (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data)) goto err; smp_set_owner(&aead_tag, smp->px, smp->sess, smp->strm, smp->opt); - if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag)) - goto err; - - if (arg_p[3].type == ARGT_VAR) { - dec_size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, smp_trash_alloc->size); - if (dec_size < 0) + if (dec) { + if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag)) goto err; - smp_trash_alloc->data = dec_size; - aead_tag.data.u.str = *smp_trash_alloc; - } - dec_size = smp_trash->data; + if (arg_p[3].type == ARGT_VAR) { + size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, + smp_trash_alloc->size); + if (size < 0) + goto err; + smp_trash_alloc->data = size; + aead_tag.data.u.str = *smp_trash_alloc; + } - EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, (void *) aead_tag.data.u.str.area); - ret = EVP_DecryptFinal_ex(ctx, (unsigned char *) smp_trash->area + smp_trash->data, (int *) &smp_trash->data); + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, + (void *) aead_tag.data.u.str.area); + } + + size = smp_trash->data; + ret = sample_conv_aes_gcm_final(dec, ctx, (unsigned char *) smp_trash->area + smp_trash->data, + (int *) &smp_trash->data); if (ret <= 0) goto err; - smp->data.u.str.data = dec_size + smp_trash->data; + if (!dec) { + struct buffer *trash = get_trash_chunk(); + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, (void *) trash->area); + + aead_tag.data.u.str = *smp_trash_alloc; + ret = a2base64(trash->area, 16, aead_tag.data.u.str.area, aead_tag.data.u.str.size); + if (ret < 0) + goto err; + + aead_tag.data.u.str.data = ret; + aead_tag.data.type = SMP_T_STR; + + if (!var_set(arg_p[3].data.var.name_hash, arg_p[3].data.var.scope, &aead_tag, + (arg_p[3].data.var.scope == SCOPE_PROC) ? VF_COND_IFEXISTS : 0)) { + goto err; + } + } + + smp->data.u.str.data = size + smp_trash->data; smp->data.u.str.area = smp_trash->area; smp->data.type = SMP_T_BIN; smp_dup(smp); @@ -1317,61 +1373,61 @@ smp_fetch_ssl_fc_is_resumed(const struct arg *args, struct sample *smp, const ch static int smp_fetch_ssl_fc_ec(const struct arg *args, struct sample *smp, const char *kw, void *private) { - struct connection *conn; - SSL *ssl; - int __maybe_unused nid; - char *curve_name; - - if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) - conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL; - else - conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) : - smp->strm ? sc_conn(smp->strm->scb) : NULL; - - ssl = ssl_sock_get_ssl_object(conn); - if (!ssl) - return 0; - - /* - * SSL_get0_group_name is a function to get the curve name and is available from - * OpenSSL v3.2 onwards. For OpenSSL >=3.0 and <3.2, we will continue to use - * SSL_get_negotiated_group to get the curve name. - */ - #if (HA_OPENSSL_VERSION_NUMBER >= 0x3020000fL) - curve_name = (char *)SSL_get0_group_name(ssl); - if (curve_name == NULL) - return 0; - else { - /** - * The curve name returned by SSL_get0_group_name is in lowercase whereas the curve - * name returned when we use `SSL_get_negotiated_group` and `OBJ_nid2sn` is the - * short name and is in upper case. To make the return value consistent across the - * different functional calls and to make it consistent while upgrading OpenSSL versions, - * will convert the curve name returned by SSL_get0_group_name to upper case. - */ - int i; - - for (i = 0; curve_name[i]; i++) - curve_name[i] = toupper(curve_name[i]); - } - #else - nid = SSL_get_negotiated_group(ssl); - if (!nid) - return 0; - curve_name = (char *)OBJ_nid2sn(nid); - if (curve_name == NULL) - return 0; - #endif - - smp->data.u.str.area = curve_name; - if (!smp->data.u.str.area) - return 0; - - smp->data.type = SMP_T_STR; - smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST; - smp->data.u.str.data = strlen(smp->data.u.str.area); - - return 1; + struct connection *conn; + SSL *ssl; + int __maybe_unused nid; + char *curve_name; + + if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) + conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL; + else + conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) : + smp->strm ? sc_conn(smp->strm->scb) : NULL; + + ssl = ssl_sock_get_ssl_object(conn); + if (!ssl) + return 0; + + /* + * SSL_get0_group_name is a function to get the curve name and is available from + * OpenSSL v3.2 onwards. For OpenSSL >=3.0 and <3.2, we will continue to use + * SSL_get_negotiated_group to get the curve name. + */ +# if (HA_OPENSSL_VERSION_NUMBER >= 0x3020000fL) + curve_name = (char *)SSL_get0_group_name(ssl); + if (curve_name == NULL) { + return 0; + } else { + /* + * The curve name returned by SSL_get0_group_name is in lowercase whereas the curve + * name returned when we use `SSL_get_negotiated_group` and `OBJ_nid2sn` is the + * short name and is in upper case. To make the return value consistent across the + * different functional calls and to make it consistent while upgrading OpenSSL versions, + * will convert the curve name returned by SSL_get0_group_name to upper case. + */ + int i; + + for (i = 0; curve_name[i]; i++) + curve_name[i] = toupper(curve_name[i]); + } +# else + nid = SSL_get_negotiated_group(ssl); + if (!nid) + return 0; + curve_name = (char *)OBJ_nid2sn(nid); + if (curve_name == NULL) + return 0; +# endif + + smp->data.u.str.area = curve_name; + if (!smp->data.u.str.area) + return 0; + + smp->data.type = SMP_T_STR; + smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST; + smp->data.u.str.data = strlen(smp->data.u.str.area); + + return 1; } #endif @@ -2263,6 +2319,15 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "ssl_bc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV }, { "ssl_bc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV }, #endif +#ifdef HAVE_SSL_KEYLOG + { "ssl_bc_client_early_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_client_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_server_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_client_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_server_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_early_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, +#endif { "ssl_bc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "ssl_bc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, { "ssl_c_ca_err", smp_fetch_ssl_c_ca_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, @@ -2367,7 +2432,8 @@ INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords); static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "sha2", sample_conv_sha2, ARG1(0, SINT), smp_check_sha2, SMP_T_BIN, SMP_T_BIN }, #ifdef EVP_CIPH_GCM_MODE - { "aes_gcm_dec", sample_conv_aes_gcm_dec, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, + { "aes_gcm_enc", sample_conv_aes_gcm, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, + { "aes_gcm_dec", sample_conv_aes_gcm, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, #endif { "x509_v_err_str", sample_conv_x509_v_err, 0, NULL, SMP_T_SINT, SMP_T_STR }, { "digest", sample_conv_crypto_digest, ARG1(1,STR), check_crypto_digest, SMP_T_BIN, SMP_T_BIN }, diff --git a/src/ssl_sock.c b/src/ssl_sock.c index 96d826e..e6bf3ff 100644 --- a/src/ssl_sock.c +++ b/src/ssl_sock.c @@ -72,6 +72,7 @@ #include <haproxy/shctx.h> #include <haproxy/ssl_ckch.h> #include <haproxy/ssl_crtlist.h> +#include <haproxy/ssl_gencert.h> #include <haproxy/ssl_sock.h> #include <haproxy/ssl_utils.h> #include <haproxy/stats.h> @@ -135,9 +136,12 @@ struct global_ssl global_ssl = { #ifdef HAVE_SSL_KEYLOG .keylog = 0, #endif + .security_level = -1, #ifndef OPENSSL_NO_OCSP .ocsp_update.delay_max = SSL_OCSP_UPDATE_DELAY_MAX, .ocsp_update.delay_min = SSL_OCSP_UPDATE_DELAY_MIN, + .ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_OFF, + .ocsp_update.disable = 0, #endif }; @@ -156,7 +160,7 @@ enum { SSL_ST_STATS_COUNT /* must be the last member of the enum */ }; -static struct name_desc ssl_stats[] = { +static struct stat_col ssl_stats[] = { [SSL_ST_SESS] = { .name = "ssl_sess", .desc = "Total number of ssl sessions established" }, [SSL_ST_REUSED_SESS] = { .name = "ssl_reused_sess", @@ -171,13 +175,37 @@ static struct ssl_counters { long long failed_handshake; } ssl_counters; -static void ssl_fill_stats(void *data, struct field *stats) +static int ssl_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct ssl_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[SSL_ST_SESS] = mkf_u64(FN_COUNTER, counters->sess); - stats[SSL_ST_REUSED_SESS] = mkf_u64(FN_COUNTER, counters->reused_sess); - stats[SSL_ST_FAILED_HANDSHAKE] = mkf_u64(FN_COUNTER, counters->failed_handshake); + for (; current_field < SSL_ST_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case SSL_ST_SESS: + metric = mkf_u64(FN_COUNTER, counters->sess); + break; + case SSL_ST_REUSED_SESS: + metric = mkf_u64(FN_COUNTER, counters->reused_sess); + break; + case SSL_ST_FAILED_HANDSHAKE: + metric = mkf_u64(FN_COUNTER, counters->failed_handshake); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module ssl_stats_module = { @@ -504,38 +532,8 @@ static HASSL_DH *global_dh = NULL; static HASSL_DH *local_dh_1024 = NULL; static HASSL_DH *local_dh_2048 = NULL; static HASSL_DH *local_dh_4096 = NULL; -#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) -static DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen); -#else -static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey); -#endif #endif /* OPENSSL_NO_DH */ -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) -/* X509V3 Extensions that will be added on generated certificates */ -#define X509V3_EXT_SIZE 5 -static char *x509v3_ext_names[X509V3_EXT_SIZE] = { - "basicConstraints", - "nsComment", - "subjectKeyIdentifier", - "authorityKeyIdentifier", - "keyUsage", -}; -static char *x509v3_ext_values[X509V3_EXT_SIZE] = { - "CA:FALSE", - "\"OpenSSL Generated Certificate\"", - "hash", - "keyid,issuer:always", - "nonRepudiation,digitalSignature,keyEncipherment" -}; -/* LRU cache to store generated certificate */ -static struct lru64_head *ssl_ctx_lru_tree = NULL; -static unsigned int ssl_ctx_lru_seed = 0; -static unsigned int ssl_ctx_serial; -__decl_rwlock(ssl_ctx_lru_rwlock); - -#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME - /* The order here matters for picking a default context, * keep the most common keytype at the bottom of the list */ @@ -1109,40 +1107,40 @@ static int tlskeys_finalize_config(void) * Returns 1 if no ".ocsp" file found, 0 if OCSP status extension is * successfully enabled, or -1 in other error case. */ -static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data *data, STACK_OF(X509) *chain) +static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_store *store, STACK_OF(X509) *chain) { + struct ckch_data *data = store->data; X509 *x, *issuer; int i, ret = -1; struct certificate_ocsp *ocsp = NULL, *iocsp; char *warn = NULL; unsigned char *p; -#ifndef USE_OPENSSL_WOLFSSL -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) - int (*callback) (SSL *, void *); -#else - void (*callback) (void); -#endif +#ifdef USE_OPENSSL_WOLFSSL + /* typedef int(*tlsextStatusCb)(WOLFSSL* ssl, void*); */ + tlsextStatusCb callback = NULL; +#elif (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) + int (*callback) (SSL *, void *) = NULL; #else - tlsextStatusCb callback; + void (*callback) (void) = NULL; #endif struct buffer *ocsp_uri = get_trash_chunk(); char *err = NULL; size_t path_len; + int inc_refcount_store = 0; + int enable_auto_update = (store->conf.ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) || + (store->conf.ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_DFLT && + global_ssl.ocsp_update.mode == SSL_SOCK_OCSP_UPDATE_ON); x = data->cert; if (!x) goto out; ssl_ocsp_get_uri_from_cert(x, ocsp_uri, &err); - /* We should have an "OCSP URI" field in order for auto update to work. */ - if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON && b_data(ocsp_uri) == 0) - goto out; - - /* In case of ocsp update mode set to 'on', this function might be - * called with no known ocsp response. If no ocsp uri can be found in - * the certificate, nothing needs to be done here. */ if (!data->ocsp_response && !data->ocsp_cid) { - if (data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_ON || b_data(ocsp_uri) == 0) { + /* In case of ocsp update mode set to 'on', this function might + * be called with no known ocsp response. If no ocsp uri can be + * found in the certificate, nothing needs to be done here. */ + if (!enable_auto_update || b_data(ocsp_uri) == 0) { ret = 0; goto out; } @@ -1163,8 +1161,10 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (!issuer) goto out; - if (!data->ocsp_cid) + if (!data->ocsp_cid) { data->ocsp_cid = OCSP_cert_to_id(0, x, issuer); + inc_refcount_store = 1; + } if (!data->ocsp_cid) goto out; @@ -1185,12 +1185,11 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (iocsp == ocsp) ocsp = NULL; -#ifndef SSL_CTX_get_tlsext_status_cb -# define SSL_CTX_get_tlsext_status_cb(ctx, cb) \ - *cb = (void (*) (void))ctx->tlsext_status_cb; -#endif SSL_CTX_get_tlsext_status_cb(ctx, &callback); + if (inc_refcount_store) + iocsp->refcount_store++; + if (!callback) { struct ocsp_cbk_arg *cb_arg; EVP_PKEY *pkey; @@ -1282,7 +1281,7 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * */ memcpy(iocsp->path, path, path_len + 1); - if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + if (enable_auto_update) { ssl_ocsp_update_insert(iocsp); /* If we are during init the update task is not * scheduled yet so a wakeup won't do anything. @@ -1294,7 +1293,7 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (ocsp_update_task) task_wakeup(ocsp_update_task, TASK_WOKEN_MSG); } - } else if (iocsp->uri && data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + } else if (iocsp->uri && enable_auto_update) { /* This unlikely case can happen if a series of "del ssl * crt-list" / "add ssl crt-list" commands are made on the CLI. * In such a case, the OCSP response tree entry will be created @@ -1910,342 +1909,6 @@ static int ssl_sock_advertise_alpn_protos(SSL *s, const unsigned char **out, } #endif -#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME -#ifndef SSL_NO_GENERATE_CERTIFICATES - -/* Configure a DNS SAN extension on a certificate. */ -int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) { - int failure = 0; - X509_EXTENSION *san_ext = NULL; - CONF *conf = NULL; - struct buffer *san_name = get_trash_chunk(); - - conf = NCONF_new(NULL); - if (!conf) { - failure = 1; - goto cleanup; - } - - /* Build an extension based on the DNS entry above */ - chunk_appendf(san_name, "DNS:%s", servername); - san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area); - if (!san_ext) { - failure = 1; - goto cleanup; - } - - /* Add the extension */ - if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) { - failure = 1; - goto cleanup; - } - - /* Success */ - failure = 0; - -cleanup: - if (NULL != san_ext) X509_EXTENSION_free(san_ext); - if (NULL != conf) NCONF_free(conf); - - return failure; -} - -/* Create a X509 certificate with the specified servername and serial. This - * function returns a SSL_CTX object or NULL if an error occurs. */ -static SSL_CTX * -ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl) -{ - X509 *cacert = bind_conf->ca_sign_ckch->cert; - EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key; - SSL_CTX *ssl_ctx = NULL; - X509 *newcrt = NULL; - EVP_PKEY *pkey = NULL; - SSL *tmp_ssl = NULL; - CONF *ctmp = NULL; - X509_NAME *name; - const EVP_MD *digest; - X509V3_CTX ctx; - unsigned int i; - int key_type; - - /* Get the private key of the default certificate and use it */ -#ifdef HAVE_SSL_CTX_get0_privatekey - pkey = SSL_CTX_get0_privatekey(bind_conf->default_ctx); -#else - tmp_ssl = SSL_new(bind_conf->default_ctx); - if (tmp_ssl) - pkey = SSL_get_privatekey(tmp_ssl); -#endif - if (!pkey) - goto mkcert_error; - - /* Create the certificate */ - if (!(newcrt = X509_new())) - goto mkcert_error; - - /* Set version number for the certificate (X509v3) and the serial - * number */ - if (X509_set_version(newcrt, 2L) != 1) - goto mkcert_error; - ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1)); - - /* Set duration for the certificate */ - if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) || - !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365)) - goto mkcert_error; - - /* set public key in the certificate */ - if (X509_set_pubkey(newcrt, pkey) != 1) - goto mkcert_error; - - /* Set issuer name from the CA */ - if (!(name = X509_get_subject_name(cacert))) - goto mkcert_error; - if (X509_set_issuer_name(newcrt, name) != 1) - goto mkcert_error; - - /* Set the subject name using the same, but the CN */ - name = X509_NAME_dup(name); - if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, - (const unsigned char *)servername, - -1, -1, 0) != 1) { - X509_NAME_free(name); - goto mkcert_error; - } - if (X509_set_subject_name(newcrt, name) != 1) { - X509_NAME_free(name); - goto mkcert_error; - } - X509_NAME_free(name); - - /* Add x509v3 extensions as specified */ - ctmp = NCONF_new(NULL); - X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0); - for (i = 0; i < X509V3_EXT_SIZE; i++) { - X509_EXTENSION *ext; - - if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i]))) - goto mkcert_error; - if (!X509_add_ext(newcrt, ext, -1)) { - X509_EXTENSION_free(ext); - goto mkcert_error; - } - X509_EXTENSION_free(ext); - } - - /* Add SAN extension */ - if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) { - goto mkcert_error; - } - - /* Sign the certificate with the CA private key */ - - key_type = EVP_PKEY_base_id(capkey); - - if (key_type == EVP_PKEY_DSA) - digest = EVP_sha1(); - else if (key_type == EVP_PKEY_RSA) - digest = EVP_sha256(); - else if (key_type == EVP_PKEY_EC) - digest = EVP_sha256(); - else { -#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID - int nid; - - if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0) - goto mkcert_error; - if (!(digest = EVP_get_digestbynid(nid))) - goto mkcert_error; -#else - goto mkcert_error; -#endif - } - - if (!(X509_sign(newcrt, capkey, digest))) - goto mkcert_error; - - /* Create and set the new SSL_CTX */ - if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method()))) - goto mkcert_error; - if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey)) - goto mkcert_error; - if (!SSL_CTX_use_certificate(ssl_ctx, newcrt)) - goto mkcert_error; - if (!SSL_CTX_check_private_key(ssl_ctx)) - goto mkcert_error; - - /* Build chaining the CA cert and the rest of the chain, keep these order */ -#if defined(SSL_CTX_add1_chain_cert) - if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) { - goto mkcert_error; - } - - if (bind_conf->ca_sign_ckch->chain) { - for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) { - X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i); - if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) { - goto mkcert_error; - } - } - } -#endif - - if (newcrt) X509_free(newcrt); - -#ifndef OPENSSL_NO_DH -#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) - SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk); -#else - ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey); -#endif -#endif - -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) -#if defined(SSL_CTX_set1_curves_list) - { - const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); - if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe)) - goto end; - } -#endif -#else -#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) - { - const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); - EC_KEY *ecc; - int nid; - - if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef) - goto end; - if (!(ecc = EC_KEY_new_by_curve_name(nid))) - goto end; - SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc); - EC_KEY_free(ecc); - } -#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */ -#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */ - end: - return ssl_ctx; - - mkcert_error: - if (ctmp) NCONF_free(ctmp); - if (tmp_ssl) SSL_free(tmp_ssl); - if (ssl_ctx) SSL_CTX_free(ssl_ctx); - if (newcrt) X509_free(newcrt); - return NULL; -} - - -/* Do a lookup for a certificate in the LRU cache used to store generated - * certificates and immediately assign it to the SSL session if not null. */ -SSL_CTX * -ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl) -{ - struct lru64 *lru = NULL; - - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); - if (lru && lru->domain) { - if (ssl) - SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return (SSL_CTX *)lru->data; - } - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - } - return NULL; -} - -/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This - * function is not thread-safe, it should only be used to check if a certificate - * exists in the lru cache (with no warranty it will not be removed by another - * thread). It is kept for backward compatibility. */ -SSL_CTX * -ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf) -{ - return ssl_sock_assign_generated_cert(key, bind_conf, NULL); -} - -/* Set a certificate int the LRU cache used to store generated - * certificate. Return 0 on success, otherwise -1 */ -int -ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf) -{ - struct lru64 *lru = NULL; - - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); - if (!lru) { - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return -1; - } - if (lru->domain && lru->data) - lru->free((SSL_CTX *)lru->data); - lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return 0; - } - return -1; -} - -/* Compute the key of the certificate. */ -unsigned int -ssl_sock_generated_cert_key(const void *data, size_t len) -{ - return XXH32(data, len, ssl_ctx_lru_seed); -} - -/* Generate a cert and immediately assign it to the SSL session so that the cert's - * refcount is maintained regardless of the cert's presence in the LRU cache. - */ -static int -ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl) -{ - X509 *cacert = bind_conf->ca_sign_ckch->cert; - SSL_CTX *ssl_ctx = NULL; - struct lru64 *lru = NULL; - unsigned int key; - - key = ssl_sock_generated_cert_key(servername, strlen(servername)); - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0); - if (lru && lru->domain) - ssl_ctx = (SSL_CTX *)lru->data; - if (!ssl_ctx && lru) { - ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); - lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free); - } - SSL_set_SSL_CTX(ssl, ssl_ctx); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return 1; - } - else { - ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); - SSL_set_SSL_CTX(ssl, ssl_ctx); - /* No LRU cache, this CTX will be released as soon as the session dies */ - SSL_CTX_free(ssl_ctx); - return 1; - } - return 0; -} -static int -ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl) -{ - unsigned int key; - struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index); - - if (conn_get_dst(conn)) { - key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst)); - if (ssl_sock_assign_generated_cert(key, bind_conf, ssl)) - return 1; - } - return 0; -} -#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */ - #if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c) @@ -2351,7 +2014,7 @@ static void ssl_sock_switchctx_set(SSL *ssl, SSL_CTX *ctx) * * This function does a lookup in the bind_conf sni tree so the caller should lock its tree. */ -static __maybe_unused struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername, +struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername, int have_rsa_sig, int have_ecdsa_sig) { struct ebmb_node *node, *n, *node_ecdsa = NULL, *node_rsa = NULL, *node_anonymous = NULL; @@ -2365,6 +2028,9 @@ static __maybe_unused struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s break; } } + /* if the servername is empty look for the default in the wildcard list */ + if (!*servername) + wildp = servername; /* Look for an ECDSA, RSA and DSA certificate, first in the single * name and if not found in the wildcard */ @@ -2463,7 +2129,8 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) int has_rsa_sig = 0, has_ecdsa_sig = 0; struct sni_ctx *sni_ctx; const char *servername; - size_t servername_len; + size_t servername_len = 0; + int default_lookup = 0; /* did we lookup for a default yet? */ int allow_early = 0; int i; @@ -2551,14 +2218,16 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) goto allow_early; } #endif - /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */ - if (!s->strict_sni) { - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; - } - goto abort; + + /* no servername field is not compatible with strict-sni */ + if (s->strict_sni) + goto abort; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + servername_len = 0; + default_lookup = 1; } /* extract/check clientHello information */ @@ -2634,14 +2303,14 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) } } +sni_lookup: /* we need to transform this a NULL-ended string in lowecase */ for (i = 0; i < trash.size && i < servername_len; i++) trash.area[i] = tolower(servername[i]); trash.area[i] = 0; - servername = trash.area; HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - sni_ctx = ssl_sock_chose_sni_ctx(s, servername, has_rsa_sig, has_ecdsa_sig); + sni_ctx = ssl_sock_chose_sni_ctx(s, trash.area, has_rsa_sig, has_ecdsa_sig); if (sni_ctx) { /* switch ctx */ struct ssl_bind_conf *conf = sni_ctx->conf; @@ -2658,17 +2327,20 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); #if (!defined SSL_NO_GENERATE_CERTIFICATES) - if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(servername, s, ssl)) { + if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(trash.area, s, ssl)) { /* switch ctx done in ssl_sock_generate_certificate */ goto allow_early; } #endif - if (!s->strict_sni) { - /* no certificate match, is the default_ctx */ - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; + + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + servername_len = 0; + default_lookup = 1; + + goto sni_lookup; } /* We are about to raise an handshake error so the servername extension @@ -2722,6 +2394,7 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) const char *wildp = NULL; struct ebmb_node *node, *n; struct bind_conf *s = priv; + int default_lookup = 0; /* did we lookup for a default yet? */ #ifdef USE_QUIC const uint8_t *extension_data; size_t extension_len; @@ -2761,12 +2434,15 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) #endif if (s->strict_sni) return SSL_TLSEXT_ERR_ALERT_FATAL; - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_NOACK; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + default_lookup = 1; } +sni_lookup: + for (i = 0; i < trash.size; i++) { if (!servername[i]) break; @@ -2775,6 +2451,8 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) wildp = &trash.area[i]; } trash.area[i] = 0; + if(!*trash.area) /* handle the default which in wildcard tree */ + wildp = trash.area; HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); node = NULL; @@ -2804,24 +2482,35 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) return SSL_TLSEXT_ERR_OK; } #endif - if (s->strict_sni) { - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_ALERT_FATAL; - } - ssl_sock_switchctx_set(ssl, s->default_ctx); HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_OK; + + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + default_lookup = 1; + + goto sni_lookup; + } + return SSL_TLSEXT_ERR_ALERT_FATAL; } +#if defined(OPENSSL_IS_AWSLC) + /* Note that ssl_sock_switchctx_set() calls SSL_set_SSL_CTX() which propagates the + * "early data enabled" setting from the SSL_CTX object to the SSL objects. + * So enable early data for this SSL_CTX context if configured. + */ + if (s->ssl_conf.early_data) + SSL_CTX_set_early_data_enabled(container_of(node, struct sni_ctx, name)->ctx, 1); +#endif /* switch ctx */ ssl_sock_switchctx_set(ssl, container_of(node, struct sni_ctx, name)->ctx); HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); return SSL_TLSEXT_ERR_OK; } #endif /* (!) OPENSSL_IS_BORINGSSL */ -#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */ -#if 0 && defined(USE_OPENSSL_WOLFSSL) +#if defined(USE_OPENSSL_WOLFSSL) /* This implement the equivalent of the clientHello Callback but using the cert_cb. * WolfSSL is able to extract the sigalgs and ciphers of the client byt using the API * provided in https://github.com/wolfSSL/wolfssl/pull/6963 @@ -2833,6 +2522,7 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) struct bind_conf *s = arg; int has_rsa_sig = 0, has_ecdsa_sig = 0; const char *servername; + int default_lookup = 0; struct sni_ctx *sni_ctx; int i; @@ -2844,14 +2534,13 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) servername = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name); if (!servername) { - /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */ - if (!s->strict_sni) { - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; - } - goto abort; + if (s->strict_sni) + goto abort; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + default_lookup = 1; } /* extract sigalgs and ciphers */ @@ -2895,6 +2584,8 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) } } +sni_lookup: + /* we need to transform this into a NULL-ended string in lowecase */ for (i = 0; i < trash.size && servername[i] != '\0'; i++) trash.area[i] = tolower(servername[i]); @@ -2916,12 +2607,13 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) } HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - if (!s->strict_sni) { - /* no certificate match, is the default_ctx */ - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + default_lookup = 1; + + goto sni_lookup; } /* We are about to raise an handshake error so the servername extension @@ -3224,7 +2916,7 @@ static HASSL_DH *ssl_get_tmp_dh(EVP_PKEY *pkey) #if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) /* Returns Diffie-Hellman parameters matching the private key length but not exceeding global_ssl.default_dh_param */ -static HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen) +HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen) { EVP_PKEY *pkey = SSL_get_privatekey(ssl); @@ -3250,7 +2942,7 @@ static int ssl_sock_set_tmp_dh(SSL_CTX *ctx, HASSL_DH *dh) } #if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL) -static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey) +void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey) { HASSL_DH *dh = NULL; if (pkey && (dh = ssl_get_tmp_dh(pkey))) { @@ -3335,7 +3027,7 @@ static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst, struct pkey_info kinfo, char *name, int order) { struct sni_ctx *sc; - int wild = 0, neg = 0; + int wild = 0, neg = 0, default_crt = 0; if (*name == '!') { neg = 1; @@ -3344,11 +3036,14 @@ static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst, if (*name == '*') { wild = 1; name++; + /* if this was only a '*' filter, this is a default cert */ + if (!*name) + default_crt = 1; } /* !* filter is a nop */ if (neg && wild) return order; - if (*name) { + if (*name || default_crt) { int j, len; len = strlen(name); for (j = 0; j < len && j < trash.size; j++) @@ -3420,14 +3115,6 @@ void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_ else ebst_insert(&bind_conf->sni_ctx, &sc0->name); } - - /* replace the default_ctx if required with the instance's ctx. */ - if (ckch_inst->is_default) { - SSL_CTX_free(bind_conf->default_ctx); - SSL_CTX_up_ref(ckch_inst->ctx); - bind_conf->default_ctx = ckch_inst->ctx; - bind_conf->default_inst = ckch_inst; - } } /* @@ -3625,9 +3312,10 @@ end: * The value 0 means there is no error nor warning and * the operation succeed. */ -static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_data *data, SSL_CTX *ctx, char **err) +static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_store *store, SSL_CTX *ctx, char **err) { int errcode = 0; + struct ckch_data *data = store->data; STACK_OF(X509) *find_chain = NULL; ERR_clear_error(); @@ -3679,7 +3367,7 @@ static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_data *data, * ocsp tree even if no ocsp_response was known during init, unless the * frontend's conf disables ocsp update explicitly. */ - if (ssl_sock_load_ocsp(path, ctx, data, find_chain) < 0) { + if (ssl_sock_load_ocsp(path, ctx, store, find_chain) < 0) { if (data->ocsp_response) memprintf(err, "%s '%s.ocsp' is present and activates OCSP but it is impossible to compute the OCSP certificate ID (maybe the issuer could not be found)'.\n", err && *err ? *err : "", path); @@ -3744,7 +3432,7 @@ end: * ERR_WARN if a warning is available into err */ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, - struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err) + struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, int is_default, struct ckch_inst **ckchi, char **err) { SSL_CTX *ctx; int i; @@ -3775,7 +3463,10 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct goto error; } - errcode |= ssl_sock_put_ckch_into_ctx(path, data, ctx, err); + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + + errcode |= ssl_sock_put_ckch_into_ctx(path, ckchs, ctx, err); if (errcode & ERR_CODE) goto error; @@ -3857,20 +3548,16 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct * the tree, so it will be discovered and cleaned in time. */ -#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME - if (bind_conf->default_ctx) { - memprintf(err, "%sthis version of openssl cannot load multiple SSL certificates.\n", - err && *err ? *err : ""); - errcode |= ERR_ALERT | ERR_FATAL; - goto error; - } -#endif - if (!bind_conf->default_ctx) { - bind_conf->default_ctx = ctx; - bind_conf->default_ssl_conf = ssl_conf; + if (is_default) { ckch_inst->is_default = 1; - SSL_CTX_up_ref(ctx); - bind_conf->default_inst = ckch_inst; + + /* insert an empty SNI which will be used to lookup default certificate */ + order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, "*", order); + if (order < 0) { + memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : ""); + errcode |= ERR_ALERT | ERR_FATAL; + goto error; + } } /* Always keep a reference to the newly constructed SSL_CTX in the @@ -3892,9 +3579,6 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct error: /* free the allocated sni_ctxs */ if (ckch_inst) { - if (ckch_inst->is_default) - SSL_CTX_free(ctx); - ckch_inst_free(ckch_inst); ckch_inst = NULL; } @@ -3936,6 +3620,9 @@ int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs, goto error; } + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + errcode |= ssl_sock_put_srv_ckch_into_ctx(path, data, ctx, err); if (errcode & ERR_CODE) goto error; @@ -3967,12 +3654,14 @@ error: /* Returns a set of ERR_* flags possibly with an error in <err>. */ static int ssl_sock_load_ckchs(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf, - char **sni_filter, int fcount, struct ckch_inst **ckch_inst, char **err) + char **sni_filter, int fcount, + int is_default, + struct ckch_inst **ckch_inst, char **err) { int errcode = 0; /* we found the ckchs in the tree, we can use it directly */ - errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, ckch_inst, err); + errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, is_default, ckch_inst, err); if (errcode & ERR_CODE) return errcode; @@ -4081,9 +3770,17 @@ int ssl_sock_load_cert_list_file(char *file, int dir, struct bind_conf *bind_con list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) { struct ckch_store *store; struct ckch_inst *ckch_inst = NULL; + int is_default = 0; store = entry->node.key; - cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &ckch_inst, err); + + /* if the SNI trees were empty the first "crt" become a default certificate, + * it can be applied on multiple certificates if it's a bundle */ + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) + is_default = 1; + + + cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, is_default, &ckch_inst, err); if (cfgerr & ERR_CODE) { memprintf(err, "error processing line %d in file '%s' : %s", entry->linenum, file, *err); goto error; @@ -4125,7 +3822,7 @@ error: } /* Returns a set of ERR_* flags possibly with an error in <err>. */ -int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) +int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, int is_default, char **err) { struct stat buf; int cfgerr = 0; @@ -4133,25 +3830,32 @@ int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) struct ckch_inst *ckch_inst = NULL; int found = 0; /* did we found a file to load ? */ + /* if the SNI trees were empty the first "crt" become a default certificate, + * it can be applied on multiple certificates if it's a bundle */ + if (is_default == 0) { + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) + is_default = 1; + } + if ((ckchs = ckchs_lookup(path))) { - /* we found the ckchs in the tree, we can use it directly */ - cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); - /* This certificate has an 'ocsp-update' already set in a - * previous crt-list so we must raise an error. */ - if (ckchs->data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { - memprintf(err, "%sIncompatibilities found in OCSP update mode for certificate %s\n", err && *err ? *err: "", path); - cfgerr |= ERR_ALERT | ERR_FATAL; - } + cfgerr |= ckch_conf_cmp_empty(&ckchs->conf, err); + if (cfgerr & ERR_CODE) { + memprintf(err, "Can't load '%s', is already defined with incompatible parameters:\n %s", path, err ? *err : ""); + return cfgerr; + } + + /* we found the ckchs in the tree, we can use it directly */ + cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); found++; } else if (stat(path, &buf) == 0) { found++; if (S_ISDIR(buf.st_mode) == 0) { - ckchs = ckchs_load_cert_file(path, err); + ckchs = ckch_store_new_load_files_path(path, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; - cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); } else { cfgerr |= ssl_sock_load_cert_list_file(path, 1, bind_conf, bind_conf->frontend, err); } @@ -4171,15 +3875,15 @@ int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) continue; if ((ckchs = ckchs_lookup(fp))) { - cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); found++; } else { if (stat(fp, &buf) == 0) { found++; - ckchs = ckchs_load_cert_file(fp, err); + ckchs = ckch_store_new_load_files_path(fp, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; - cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); } } } @@ -4229,7 +3933,7 @@ int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none /* We do not manage directories on backend side. */ if (S_ISDIR(buf.st_mode) == 0) { ++found; - ckchs = ckchs_load_cert_file(path, err); + ckchs = ckch_store_new_load_files_path(path, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err); @@ -4274,6 +3978,9 @@ ssl_sock_initial_ctx(struct bind_conf *bind_conf) ctx = SSL_CTX_new(SSLv23_server_method()); bind_conf->initial_ctx = ctx; + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max)) ha_warning("Proxy '%s': no-sslv3/no-tlsv1x are ignored for bind '%s' at [%s:%d]. " "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n", @@ -4384,7 +4091,7 @@ ssl_sock_initial_ctx(struct bind_conf *bind_conf) # endif /* ! SSL_OP_NO_ANTI_REPLAY */ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL); SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk); -# elif 0 && defined(USE_OPENSSL_WOLFSSL) +# elif defined(USE_OPENSSL_WOLFSSL) SSL_CTX_set_cert_cb(ctx, ssl_sock_switchctx_wolfSSL_cbk, bind_conf); # else /* ! OPENSSL_IS_BORINGSSL && ! HAVE_SSL_CLIENT_HELLO_CB */ @@ -5270,6 +4977,8 @@ int ssl_sock_prepare_srv_ctx(struct server *srv) cfgerr++; return cfgerr; } + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); srv->ssl_ctx.ctx = ctx; } @@ -5429,6 +5138,16 @@ static int ssl_sock_prepare_srv_ssl_ctx(const struct server *srv, SSL_CTX *ctx) cfgerr++; } +#ifdef SSL_CTRL_SET_MSG_CALLBACK + SSL_CTX_set_msg_callback(ctx, ssl_sock_msgcbk); +#endif + +#ifdef HAVE_SSL_KEYLOG + /* only activate the keylog callback if it was required to prevent performance loss */ + if (global_ssl.keylog > 0) + SSL_CTX_set_keylog_callback(ctx, SSL_CTX_keylog); +#endif + #ifdef HAVE_SSL_CTX_SET_CIPHERSUITES if (srv->ssl_ctx.ciphersuites && !SSL_CTX_set_ciphersuites(ctx, srv->ssl_ctx.ciphersuites)) { @@ -5547,16 +5266,12 @@ int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf) to initial_ctx in ssl_initial_ctx. */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, NULL, bind_conf->initial_ctx, NULL, &errmsg); } - if (bind_conf->default_ctx) { - errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, bind_conf->default_ssl_conf, bind_conf->default_ctx, bind_conf->default_inst, &errmsg); - } node = ebmb_first(&bind_conf->sni_ctx); while (node) { sni = ebmb_entry(node, struct sni_ctx, name); - if (!sni->order && sni->ctx != bind_conf->default_ctx) { - /* only initialize the CTX on its first occurrence and - if it is not the default_ctx */ + if (!sni->order) { + /* only initialize the CTX on its first occurrence */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg); } node = ebmb_next(node); @@ -5565,9 +5280,8 @@ int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf) node = ebmb_first(&bind_conf->sni_w_ctx); while (node) { sni = ebmb_entry(node, struct sni_ctx, name); - if (!sni->order && sni->ctx != bind_conf->default_ctx) { - /* only initialize the CTX on its first occurrence and - if it is not the default_ctx */ + if (!sni->order) { + /* only initialize the CTX on its first occurrence */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg); } node = ebmb_next(node); @@ -5594,14 +5308,17 @@ int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf) int alloc_ctx; int err; + /* check if some certificates were loaded but no ssl keyword is used */ if (!(bind_conf->options & BC_O_USE_SSL)) { - if (bind_conf->default_ctx) { + if (!eb_is_empty(&bind_conf->sni_ctx) || !eb_is_empty(&bind_conf->sni_w_ctx)) { ha_warning("Proxy '%s': A certificate was specified but SSL was not enabled on bind '%s' at [%s:%d] (use 'ssl').\n", px->id, bind_conf->arg, bind_conf->file, bind_conf->line); } return 0; } - if (!bind_conf->default_ctx) { + + /* check if we have certificates */ + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) { if (bind_conf->strict_sni && !(bind_conf->options & BC_O_GENERATE_CERTS)) { ha_warning("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d], ssl connections will fail (use 'crt').\n", px->id, bind_conf->arg, bind_conf->file, bind_conf->line); @@ -5612,10 +5329,23 @@ int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf) return -1; } } + + if ((bind_conf->options & BC_O_GENERATE_CERTS)) { + struct sni_ctx *sni_ctx; + + /* if we use the generate-certificates option, look for the first default cert available */ + sni_ctx = ssl_sock_chose_sni_ctx(bind_conf, "", 1, 1); + if (!sni_ctx) { + ha_alert("Proxy '%s': no SSL certificate specified for bind '%s' and 'generate-certificates' option at [%s:%d] (use 'crt').\n", + px->id, bind_conf->arg, bind_conf->file, bind_conf->line); + return -1; + } + } + if (!ssl_shctx && global.tune.sslcachesize) { alloc_ctx = shctx_init(&ssl_shctx, global.tune.sslcachesize, sizeof(struct sh_ssl_sess_hdr) + SHSESS_BLOCK_MIN_SIZE, -1, - sizeof(*sh_ssl_sess_tree)); + sizeof(*sh_ssl_sess_tree), "ssl cache"); if (alloc_ctx <= 0) { if (alloc_ctx == SHCTX_E_INIT_LOCK) ha_alert("Unable to initialize the lock for the shared SSL session cache. You can retry using the global statement 'tune.ssl.force-private-cache' but it could increase CPU usage due to renegotiations if nbproc > 1.\n"); @@ -5713,10 +5443,6 @@ void ssl_sock_free_all_ctx(struct bind_conf *bind_conf) SSL_CTX_free(bind_conf->initial_ctx); bind_conf->initial_ctx = NULL; - SSL_CTX_free(bind_conf->default_ctx); - bind_conf->default_ctx = NULL; - bind_conf->default_inst = NULL; - bind_conf->default_ssl_conf = NULL; } @@ -5746,81 +5472,6 @@ void ssl_sock_destroy_bind_conf(struct bind_conf *bind_conf) bind_conf->ca_sign_file = NULL; } -/* Load CA cert file and private key used to generate certificates */ -int -ssl_sock_load_ca(struct bind_conf *bind_conf) -{ - struct proxy *px = bind_conf->frontend; - struct ckch_data *data = NULL; - int ret = 0; - char *err = NULL; - - if (!(bind_conf->options & BC_O_GENERATE_CERTS)) - return ret; - -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) - if (global_ssl.ctx_cache) { - ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache); - } - ssl_ctx_lru_seed = (unsigned int)time(NULL); - ssl_ctx_serial = now_ms; -#endif - - if (!bind_conf->ca_sign_file) { - ha_alert("Proxy '%s': cannot enable certificate generation, " - "no CA certificate File configured at [%s:%d].\n", - px->id, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Allocate cert structure */ - data = calloc(1, sizeof(*data)); - if (!data) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Try to parse file */ - if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, data, &err)) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err); - free(err); - goto failed; - } - - /* Fail if missing cert or pkey */ - if ((!data->cert) || (!data->key)) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Final assignment to bind */ - bind_conf->ca_sign_ckch = data; - return ret; - - failed: - if (data) { - ssl_sock_free_cert_key_and_chain_contents(data); - free(data); - } - - bind_conf->options &= ~BC_O_GENERATE_CERTS; - ret++; - return ret; -} - -/* Release CA cert and private key used to generate certificated */ -void -ssl_sock_free_ca(struct bind_conf *bind_conf) -{ - if (bind_conf->ca_sign_ckch) { - ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch); - ha_free(&bind_conf->ca_sign_ckch); - } -} - /* * Try to allocate the BIO and SSL session objects of <conn> connection with <bio> and * <ssl> as addresses, <bio_meth> as BIO method and <ssl_ctx> as SSL context inherited settings. @@ -6060,7 +5711,7 @@ static int ssl_sock_init(struct connection *conn, void **xprt_ctx) #ifdef SSL_READ_EARLY_DATA_SUCCESS if (bc->ssl_conf.early_data) { - b_alloc(&ctx->early_buf); + b_alloc(&ctx->early_buf, DB_MUX_RX); SSL_set_max_early_data(ctx->ssl, /* Only allow early data if we managed to allocate * a buffer. @@ -6516,19 +6167,26 @@ static int ssl_unsubscribe(struct connection *conn, void *xprt_ctx, int event_ty * It should be called with the takeover lock for the old thread held. * Returns 0 on success, and -1 on failure */ -static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid) +static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid, int release) { struct ssl_sock_ctx *ctx = xprt_ctx; - struct tasklet *tl = tasklet_new(); + struct tasklet *tl = NULL; - if (!tl) - return -1; + if (!release) { + tl = tasklet_new(); + if (!tl) + return -1; + } ctx->wait_event.tasklet->context = NULL; tasklet_wakeup_on(ctx->wait_event.tasklet, orig_tid); + ctx->wait_event.tasklet = tl; - ctx->wait_event.tasklet->process = ssl_sock_io_cb; - ctx->wait_event.tasklet->context = ctx; + if (!release) { + ctx->wait_event.tasklet->process = ssl_sock_io_cb; + ctx->wait_event.tasklet->context = ctx; + } + return 0; } @@ -6558,7 +6216,7 @@ static void ssl_set_used(struct connection *conn, void *xprt_ctx) if (!ctx || !ctx->wait_event.tasklet) return; - HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1); + HA_ATOMIC_AND(&ctx->wait_event.tasklet->state, ~TASK_F_USR1); if (ctx->xprt) xprt_set_used(conn, ctx->xprt, ctx->xprt_ctx); } @@ -7873,6 +7531,8 @@ static void __ssl_sock_init(void) xprt_register(XPRT_SSL, &ssl_sock); #if HA_OPENSSL_VERSION_NUMBER < 0x10100000L SSL_library_init(); +#elif HA_OPENSSL_VERSION_NUMBER >= 0x10100000L + OPENSSL_init_ssl(0, NULL); #endif #if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION)) cm = SSL_COMP_get_compression_methods(); @@ -8068,12 +7728,6 @@ void ssl_free_dh(void) { static void __ssl_sock_deinit(void) { -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) - if (ssl_ctx_lru_tree) { - lru64_destroy(ssl_ctx_lru_tree); - HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock); - } -#endif #if (HA_OPENSSL_VERSION_NUMBER < 0x10100000L) ERR_remove_state(0); diff --git a/src/stats-file.c b/src/stats-file.c new file mode 100644 index 0000000..1a77e31 --- /dev/null +++ b/src/stats-file.c @@ -0,0 +1,426 @@ +#include <haproxy/stats-file.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <import/ebmbtree.h> +#include <import/ebsttree.h> +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/clock.h> +#include <haproxy/errors.h> +#include <haproxy/global.h> +#include <haproxy/guid.h> +#include <haproxy/intops.h> +#include <haproxy/list.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stats.h> +#include <haproxy/time.h> + +/* Dump all fields from <stats> into <out> for stats-file. */ +int stats_dump_fields_file(struct buffer *out, + const struct field *line, size_t stats_count, + struct show_stat_ctx *ctx) +{ + struct guid_node *guid; + struct listener *l; + int i; + + switch (ctx->px_st) { + case STAT_PX_ST_FE: + case STAT_PX_ST_BE: + guid = &__objt_proxy(ctx->obj1)->guid; + break; + + case STAT_PX_ST_LI: + l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); + guid = &l->guid; + break; + + case STAT_PX_ST_SV: + guid = &__objt_server(ctx->obj2)->guid; + break; + + default: + ABORT_NOW(); + return 1; + } + + /* Skip objects without GUID. */ + if (!guid->node.key) + return 1; + + chunk_appendf(out, "%s,", (char *)guid->node.key); + + for (i = 0; i < stats_count; ++i) { + /* Empty field for stats-file is used to skip its output, + * including any separator. + */ + if (field_format(line, i) == FF_EMPTY) + continue; + + if (!stats_emit_raw_data_field(out, &line[i])) + return 0; + if (!chunk_strcat(out, ",")) + return 0; + } + + chunk_strcat(out, "\n"); + return 1; +} + +void stats_dump_file_header(int type, struct buffer *out) +{ + const struct stat_col *col; + int i; + + /* Caller must specified ither FE or BE. */ + BUG_ON(!(type & ((1 << STATS_TYPE_FE) | (1 << STATS_TYPE_BE)))); + + if (type & (1 << STATS_TYPE_FE)) { + chunk_strcat(out, "#fe guid,"); + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &stat_cols_px[i]; + if (stcol_is_generic(col) && + col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI)) { + chunk_appendf(out, "%s,", col->name); + } + } + } + else { + chunk_appendf(out, "#be guid,"); + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &stat_cols_px[i]; + if (stcol_is_generic(col) && + col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)) { + chunk_appendf(out, "%s,", col->name); + } + } + } + + chunk_strcat(out, "\n"); +} + +/* Parse an identified header line <header> starting with '#' character. + * + * If the section is recognized, <domain> will point to the current stats-file + * scope. <cols> will be filled as a matrix to identify each stat_col position + * using <st_tree> as prefilled proxy stats columns. If stats-file section is + * unknown, only <domain> will be set to STFILE_DOMAIN_UNSET. + * + * Returns 0 on success. On fatal error, non-zero is returned and parsing should + * be interrupted. + */ +static int parse_header_line(struct ist header, struct eb_root *st_tree, + enum stfile_domain *domain, + const struct stat_col *cols[]) +{ + enum stfile_domain dom = STFILE_DOMAIN_UNSET; + struct ist token; + char last; + int i; + + header = iststrip(header); + last = istptr(header)[istlen(header) - 1]; + token = istsplit(&header, ' '); + + /* A header line is considered valid if: + * - a space delimiter is found and first token is several chars + * - last line character must be a comma separator + */ + if (!istlen(header) || istlen(token) == 1 || last != ',') + goto err; + + if (isteq(token, ist("#fe"))) + dom = STFILE_DOMAIN_PX_FE; + else if (isteq(token, ist("#be"))) + dom = STFILE_DOMAIN_PX_BE; + + /* Remove 'guid' field. */ + token = istsplit(&header, ','); + if (!isteq(token, ist("guid"))) { + /* Fatal error if FE/BE domain without guid token. */ + if (dom == STFILE_DOMAIN_PX_FE || dom == STFILE_DOMAIN_PX_BE) + goto err; + } + + /* Unknown domain. Following lines should be ignored until next header. */ + if (dom == STFILE_DOMAIN_UNSET) + return 0; + + /* Generate matrix of stats column into cols[]. */ + memset(cols, 0, sizeof(void *) * STAT_FILE_MAX_COL_COUNT); + + i = 0; + while (istlen(header) && i < STAT_FILE_MAX_COL_COUNT) { + struct stcol_node *col_node; + const struct stat_col *col; + struct ebmb_node *node; + + /* Lookup column by its name into <st_tree>. */ + token = istsplit(&header, ','); + node = ebst_lookup(st_tree, ist0(token)); + if (!node) { + ++i; + continue; + } + + col_node = ebmb_entry(node, struct stcol_node, name); + col = col_node->col; + + /* Ignore column if its cap is not valid with current stats-file section. */ + if ((dom == STFILE_DOMAIN_PX_FE && + !(col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI))) || + (dom == STFILE_DOMAIN_PX_BE && + !(col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)))) { + ++i; + continue; + } + + cols[i] = col; + ++i; + } + + *domain = dom; + return 0; + + err: + *domain = STFILE_DOMAIN_UNSET; + return 1; +} + +/* Preload an individual counter instance stored at <counter> with <token> + * value> for the <col> stat column. + * + * Returns 0 on success else non-zero if counter was not updated. + */ +static int load_ctr(const struct stat_col *col, const struct ist token, + void* counter) +{ + const enum field_nature fn = stcol_nature(col); + const enum field_format ff = stcol_format(col); + const char *ptr = istptr(token); + struct field value; + + switch (ff) { + case FF_U64: + value.u.u64 = read_uint64(&ptr, istend(token)); + break; + + case FF_S32: + case FF_U32: + value.u.u32 = read_uint(&ptr, istend(token)); + break; + + default: + /* Unsupported field nature. */ + return 1; + } + + /* Do not load value if non numeric characters present. */ + if (ptr != istend(token)) + return 1; + + if (fn == FN_COUNTER && ff == FF_U64) { + *(uint64_t *)counter = value.u.u64; + } + else if (fn == FN_RATE && ff == FF_U32) { + preload_freq_ctr(counter, value.u.u32); + } + else if (fn == FN_AGE && (ff == FF_U32 || ff == FF_S32)) { + *(uint32_t *)counter = ns_to_sec(now_ns) - value.u.u32; + } + else { + /* Unsupported field format/nature combination. */ + return 1; + } + + return 0; +} + +/* Parse a non header stats-file line <line>. Specify current parsing <domain> + * and <cols> stats column matrix derived from the last header line. + * + * Returns 0 on success else non-zero. + */ +static int parse_stat_line(struct ist line, + enum stfile_domain domain, + const struct stat_col *cols[]) +{ + struct guid_node *node; + struct listener *li; + struct server *srv; + struct proxy *px; + struct ist token; + char *base_off; + char *guid; + int i, off; + + token = istsplit(&line, ','); + guid = ist0(token); + if (!guid_is_valid_fmt(guid, NULL)) + goto err; + + node = guid_lookup(guid); + if (!node) { + /* Silently ignored unknown GUID. */ + return 0; + } + + switch (obj_type(node->obj_type)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(node->obj_type); + + if (domain == STFILE_DOMAIN_PX_FE) { + if (!(px->cap & PR_CAP_FE)) + goto err; + base_off = (char *)&px->fe_counters; + off = 0; + } + else if (domain == STFILE_DOMAIN_PX_BE) { + if (!(px->cap & PR_CAP_BE)) + goto err; + base_off = (char *)&px->be_counters; + off = 1; + } + else { + goto err; + } + + break; + + case OBJ_TYPE_LISTENER: + if (domain != STFILE_DOMAIN_PX_FE) + goto err; + + li = __objt_listener(node->obj_type); + /* Listeners counters are not allocated if 'option socket-stats' unset. */ + if (!li->counters) + return 0; + + base_off = (char *)li->counters; + off = 0; + break; + + case OBJ_TYPE_SERVER: + if (domain != STFILE_DOMAIN_PX_BE) + goto err; + + srv = __objt_server(node->obj_type); + base_off = (char *)&srv->counters; + off = 1; + break; + + default: + goto err; + } + + i = 0; + while (istlen(line) && i < STAT_FILE_MAX_COL_COUNT) { + const struct stat_col *col = cols[i++]; + + token = istsplit(&line, ','); + if (!istlen(token)) + continue; + + if (!col) + continue; + + load_ctr(col, token, base_off + col->metric.offset[off]); + } + + return 0; + + err: + return 1; +} + +/* Parse a stats-file and preload haproxy internal counters. */ +void apply_stats_file(void) +{ + const struct stat_col *cols[STAT_FILE_MAX_COL_COUNT]; + struct eb_root st_tree = EB_ROOT; + enum stfile_domain domain; + int valid_format = 0; + FILE *file; + struct ist istline; + char *line = NULL; + int linenum; + + if (!global.stats_file) + return; + + file = fopen(global.stats_file, "r"); + if (!file) { + ha_warning("config: Can't load stats file: cannot open file.\n"); + return; + } + + /* Generate stat columns map indexed by name. */ + if (generate_stat_tree(&st_tree, stat_cols_px)) { + ha_warning("config: Can't load stats file: not enough memory.\n"); + goto out; + } + + line = malloc(sizeof(char) * LINESIZE); + if (!line) { + ha_warning("config: Can't load stats file: line alloc error.\n"); + goto out; + } + + linenum = 0; + domain = STFILE_DOMAIN_UNSET; + while (1) { + if (!fgets(line, LINESIZE, file)) + break; + + ++linenum; + istline = iststrip(ist(line)); + if (!istlen(istline)) + continue; + + if (*istptr(istline) == '#') { + if (parse_header_line(istline, &st_tree, &domain, cols)) { + if (!valid_format) { + ha_warning("config: Invalid stats-file format.\n"); + break; + } + + ha_warning("config: Ignored stats-file header line '%d'.\n", linenum); + } + + valid_format = 1; + } + else if (domain != STFILE_DOMAIN_UNSET) { + if (parse_stat_line(istline, domain, cols)) + ha_warning("config: Ignored stats-file line %d.\n", linenum); + } + else { + /* Stop parsing if first line is not a valid header. + * Allows to immediately stop reading garbage file. + */ + if (!valid_format) { + ha_warning("config: Invalid stats-file format.\n"); + break; + } + } + } + + out: + while (!eb_is_empty(&st_tree)) { + struct ebmb_node *node = ebmb_first(&st_tree); + struct stcol_node *snode = ebmb_entry(node, struct stcol_node, name); + + ebmb_delete(node); + ha_free(&snode); + } + + ha_free(&line); + fclose(file); +} diff --git a/src/stats-html.c b/src/stats-html.c new file mode 100644 index 0000000..41eaa9e --- /dev/null +++ b/src/stats-html.c @@ -0,0 +1,2081 @@ +#include <haproxy/stats-html.h> + +#include <string.h> + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/clock.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/global.h> +#include <haproxy/http.h> +#include <haproxy/http_htx.h> +#include <haproxy/htx.h> +#include <haproxy/list.h> +#include <haproxy/listener.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/pipe.h> +#include <haproxy/proxy.h> +#include <haproxy/stats.h> +#include <haproxy/stconn.h> +#include <haproxy/server.h> +#include <haproxy/task.h> +#include <haproxy/thread.h> +#include <haproxy/time.h> +#include <haproxy/tinfo.h> +#include <haproxy/tools.h> +#include <haproxy/uri_auth-t.h> +#include <haproxy/version.h> + +static const char *field_to_html_str(const struct field *f) +{ + switch (field_format(f, 0)) { + case FF_S32: return U2H(f->u.s32); + case FF_S64: return U2H(f->u.s64); + case FF_U64: return U2H(f->u.u64); + case FF_U32: return U2H(f->u.u32); + case FF_FLT: return F2H(f->u.flt); + case FF_STR: return field_str(f, 0); + case FF_EMPTY: + default: + return ""; + } +} + +/* Dumps the HTTP stats head block to chunk ctx buffer and uses the per-uri + * parameters from the parent proxy. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_head(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + struct uri_auth *uri; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + /* WARNING! This must fit in the first buffer !!! */ + chunk_appendf(chk, + "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n" + "\"http://www.w3.org/TR/html4/loose.dtd\">\n" + "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n" + "<link rel=\"icon\" href=\"data:,\">\n" + "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n" + "<style type=\"text/css\"><!--\n" + "body {" + " font-family: arial, helvetica, sans-serif;" + " font-size: 12px;" + " font-weight: normal;" + " color: black;" + " background: white;" + "}\n" + "th,td {" + " font-size: 10px;" + "}\n" + "h1 {" + " font-size: x-large;" + " margin-bottom: 0.5em;" + "}\n" + "h2 {" + " font-family: helvetica, arial;" + " font-size: x-large;" + " font-weight: bold;" + " font-style: italic;" + " color: #6020a0;" + " margin-top: 0em;" + " margin-bottom: 0em;" + "}\n" + "h3 {" + " font-family: helvetica, arial;" + " font-size: 16px;" + " font-weight: bold;" + " color: #b00040;" + " background: #e8e8d0;" + " margin-top: 0em;" + " margin-bottom: 0em;" + "}\n" + "li {" + " margin-top: 0.25em;" + " margin-right: 2em;" + "}\n" + ".hr {margin-top: 0.25em;" + " border-color: black;" + " border-bottom-style: solid;" + "}\n" + ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n" + ".total {background: #20D0D0;color: #ffff80;}\n" + ".frontend {background: #e8e8d0;}\n" + ".socket {background: #d0d0d0;}\n" + ".backend {background: #e8e8d0;}\n" + ".active_down {background: #ff9090;}\n" + ".active_going_up {background: #ffd020;}\n" + ".active_going_down {background: #ffffa0;}\n" + ".active_up {background: #c0ffc0;}\n" + ".active_nolb {background: #20a0ff;}\n" + ".active_draining {background: #20a0FF;}\n" + ".active_no_check {background: #e0e0e0;}\n" + ".backup_down {background: #ff9090;}\n" + ".backup_going_up {background: #ff80ff;}\n" + ".backup_going_down {background: #c060ff;}\n" + ".backup_up {background: #b0d0ff;}\n" + ".backup_nolb {background: #90b0e0;}\n" + ".backup_draining {background: #cc9900;}\n" + ".backup_no_check {background: #e0e0e0;}\n" + ".maintain {background: #c07820;}\n" + ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */ + "\n" + "a.px:link {color: #ffff40; text-decoration: none;}" + "a.px:visited {color: #ffff40; text-decoration: none;}" + "a.px:hover {color: #ffffff; text-decoration: none;}" + "a.lfsb:link {color: #000000; text-decoration: none;}" + "a.lfsb:visited {color: #000000; text-decoration: none;}" + "a.lfsb:hover {color: #505050; text-decoration: none;}" + "\n" + "table.tbl { border-collapse: collapse; border-style: none;}\n" + "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n" + "table.tbl td.ac { text-align: center;}\n" + "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n" + "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n" + "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n" + "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n" + "\n" + "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n" + "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n" + "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n" + "table.det { border-collapse: collapse; border-style: none; }\n" + "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n" + "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n" + "u {text-decoration:none; border-bottom: 1px dotted black;}\n" + "div.tips {\n" + " display:block;\n" + " visibility:hidden;\n" + " z-index:2147483647;\n" + " position:absolute;\n" + " padding:2px 4px 3px;\n" + " background:#f0f060; color:#000000;\n" + " border:1px solid #7040c0;\n" + " white-space:nowrap;\n" + " font-style:normal;font-size:11px;font-weight:normal;\n" + " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n" + " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n" + "}\n" + "u:hover div.tips {visibility:visible;}\n" + "@media (prefers-color-scheme: dark) {\n" + " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n" + " h1 { color: #a265e0!important; }\n" + " h2 { color: #a265e0; }\n" + " h3 { color: #ff5190; background-color: #3e3e1f; }\n" + " a { color: #3391ff; }\n" + " input { background-color: #2f3437; }\n" + " .hr { border-color: #8c8273; }\n" + " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n" + " .frontend {background: #2f3437;}\n" + " .socket {background: #2a2d2f;}\n" + " .backend {background: #2f3437;}\n" + " .active_down {background: #760000;}\n" + " .active_going_up {background: #b99200;}\n" + " .active_going_down {background: #6c6c00;}\n" + " .active_up {background: #165900;}\n" + " .active_nolb {background: #006ab9;}\n" + " .active_draining {background: #006ab9;}\n" + " .active_no_check {background: #2a2d2f;}\n" + " .backup_down {background: #760000;}\n" + " .backup_going_up {background: #7f007f;}\n" + " .backup_going_down {background: #580092;}\n" + " .backup_up {background: #2e3234;}\n" + " .backup_nolb {background: #1e3c6a;}\n" + " .backup_draining {background: #a37a00;}\n" + " .backup_no_check {background: #2a2d2f;}\n" + " .maintain {background: #9a601a;}\n" + " a.px:link {color: #d8d83b; text-decoration: none;}\n" + " a.px:visited {color: #d8d83b; text-decoration: none;}\n" + " a.px:hover {color: #ffffff; text-decoration: none;}\n" + " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n" + " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n" + " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n" + " table.tbl th.empty { background-color: #181a1b; }\n" + " table.tbl th.desc { background: #181a1b; }\n" + " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n" + " table.tbl th { border-color: #808080; }\n" + " table.tbl td { border-color: #808080; }\n" + " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n" + " div.tips {\n" + " background:#8e8e0d;\n" + " color:#e8e6e3;\n" + " border-color: #4e2c86;\n" + " -moz-box-shadow: #60686c 2px 2px 3px;\n" + " -webkit-box-shadow: #60686c 2px 2px 3px;\n" + " box-shadow: #60686c 2px 2px 3px;\n" + " }\n" + "}\n" + "-->\n" + "</style></head>\n", + (ctx->flags & STAT_F_SHNODE) ? " on " : "", + (ctx->flags & STAT_F_SHNODE) ? (uri && uri->node ? uri->node : global.node) : "" + ); +} + +/* Dumps the HTML stats information block to chunk ctx buffer and uses the + * state from stream connector <sc> and per-uri parameter from the parent + * proxy. The caller is responsible for clearing chunk ctx buffer if needed. + */ +void stats_dump_html_info(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + unsigned int up = ns_to_sec(now_ns - start_time_ns); + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + const char *scope_ptr = stats_scope_ptr(appctx); + struct uri_auth *uri; + unsigned long long bps; + int thr; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + for (bps = thr = 0; thr < global.nbthread; thr++) + bps += 32ULL * read_freq_ctr(&ha_thread_ctx[thr].out_32bps); + + /* Turn the bytes per second to bits per second and take care of the + * usual ethernet overhead in order to help figure how far we are from + * interface saturation since it's the only case which usually matters. + * For this we count the total size of an Ethernet frame on the wire + * including preamble and IFG (1538) for the largest TCP segment it + * transports (1448 with TCP timestamps). This is not valid for smaller + * packets (under-estimated), but it gives a reasonably accurate + * estimation of how far we are from uplink saturation. + */ + bps = bps * 8 * 1538 / 1448; + + /* WARNING! this has to fit the first packet too. + * We are around 3.5 kB, add adding entries will + * become tricky if we want to support 4kB buffers ! + */ + chunk_appendf(chk, + "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">" + PRODUCT_NAME "%s</a></h1>\n" + "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n" + "<hr width=\"100%%\" class=\"hr\">\n" + "<h3>> General process information</h3>\n" + "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n" + "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n" + "<b>uptime = </b> %dd %dh%02dm%02ds; warnings = %u<br>\n" + "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n" + "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>reached = </b> %llu; <b>maxpipes = </b> %d<br>\n" + "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n" + "Running tasks: %d/%d (%d niced); idle = %d %%<br>\n" + "</td><td align=\"center\" nowrap>\n" + "<table class=\"lgd\"><tr>\n" + "<td class=\"active_up\"> </td><td class=\"noborder\">active UP </td>" + "<td class=\"backup_up\"> </td><td class=\"noborder\">backup UP </td>" + "</tr><tr>\n" + "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>" + "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>" + "</tr><tr>\n" + "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>" + "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>" + "</tr><tr>\n" + "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN </td>" + "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>" + "</tr><tr>\n" + "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) </td>" + "</tr><tr>\n" + "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance </td>" + "</tr></table>\n" + "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled." + "</td>" + "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" + "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">" + "", + (ctx->flags & STAT_F_HIDEVER) ? "" : (stats_version_string), + pid, (ctx->flags & STAT_F_SHNODE) ? " on " : "", + (ctx->flags & STAT_F_SHNODE) ? (uri->node ? uri->node : global.node) : "", + (ctx->flags & STAT_F_SHDESC) ? ": " : "", + (ctx->flags & STAT_F_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "", + pid, 1, 1, global.nbthread, + up / 86400, (up % 86400) / 3600, + (up % 3600) / 60, (up % 60), + HA_ATOMIC_LOAD(&tot_warnings), + global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited", + global.rlimit_memmax ? " MB" : "", + global.rlimit_nofile, + global.maxsock, global.maxconn, HA_ATOMIC_LOAD(&maxconn_reached), global.maxpipes, + actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec), + bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0), + bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k', + total_run_queues(), total_allocated_tasks(), total_niced_running_tasks(), clock_report_idle()); + + /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + memcpy(scope_txt, scope_ptr, ctx->scope_len); + scope_txt[ctx->scope_len] = '\0'; + + chunk_appendf(chk, + "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n", + (ctx->scope_len > 0) ? scope_txt : "", + STAT_SCOPE_TXT_MAXLEN); + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + if (ctx->flags & STAT_F_HIDE_DOWN) + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n", + uri->uri_prefix, + "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + else + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n", + uri->uri_prefix, + ";up", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + if (uri->refresh > 0) { + if (ctx->flags & STAT_F_NO_REFRESH) + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + "", + scope_txt); + else + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + ";norefresh", + scope_txt); + } + + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + chunk_appendf(chk, + "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n", + uri->uri_prefix, + (uri->refresh > 0) ? ";norefresh" : "", + scope_txt); + + chunk_appendf(chk, + "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n", + uri->uri_prefix, + (uri->refresh > 0) ? ";norefresh" : "", + scope_txt, uri->uri_prefix); + + chunk_appendf(chk, + "</ul></td>" + "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" + "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n" + "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n" + "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n" + "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n" + "</ul>" + "</td>" + "</tr></table>\n" + "" + ); + + if (ctx->st_code) { + switch (ctx->st_code) { + case STAT_STATUS_DONE: + chunk_appendf(chk, + "<p><div class=active_up>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action processed successfully." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_NONE: + chunk_appendf(chk, + "<p><div class=active_going_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Nothing has changed." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_PART: + chunk_appendf(chk, + "<p><div class=active_going_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action partially processed.<br>" + "Some server names are probably unknown or ambiguous (duplicated names in the backend)." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_ERRP: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action not processed because of invalid parameters." + "<ul>" + "<li>The action is maybe unknown.</li>" + "<li>Invalid key parameter (empty or too long).</li>" + "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>" + "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>" + "</ul>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_EXCD: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Action not processed : the buffer couldn't store all the data.<br>" + "You should retry with less servers at a time.</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_DENY: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Action denied.</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_IVAL: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Invalid requests (unsupported method or chunked encoded request).</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + default: + chunk_appendf(chk, + "<p><div class=active_no_check>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Unexpected result." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + } + chunk_appendf(chk, "<p>\n"); + } +} + +/* Dump all fields from <stats> into <out> using the HTML format. A column is + * reserved for the checkbox is STAT_F_ADMIN is set in <flags>. Some extra info + * are provided if STAT_F_SHLGNDS is present in <flags>. The statistics from + * extra modules are displayed at the end of the lines if STAT_F_SHMODULES is + * present in <flags>. + */ +int stats_dump_fields_html(struct buffer *out, + const struct field *stats, + struct show_stat_ctx *ctx) +{ + struct buffer src; + struct stats_module *mod; + int flags = ctx->flags; + int i = 0, j = 0; + + if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_FE) { + chunk_appendf(out, + /* name, queue */ + "<tr class=\"frontend\">"); + + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + + chunk_appendf(out, + "<td class=ac>" + "<a name=\"%s/Frontend\"></a>" + "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>" + "<td colspan=3></td>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_PXNAME)); + + chunk_appendf(out, + /* sessions rate : current */ + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Current connection rate:</th><td>%s/s</td></tr>" + "<tr><th>Current session rate:</th><td>%s/s</td></tr>" + "", + U2H(stats[ST_I_PX_RATE].u.u32), + U2H(stats[ST_I_PX_CONN_RATE].u.u32), + U2H(stats[ST_I_PX_RATE].u.u32)); + + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, + "<tr><th>Current request rate:</th><td>%s/s</td></tr>", + U2H(stats[ST_I_PX_REQ_RATE].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions rate : max */ + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Max connection rate:</th><td>%s/s</td></tr>" + "<tr><th>Max session rate:</th><td>%s/s</td></tr>" + "", + U2H(stats[ST_I_PX_RATE_MAX].u.u32), + U2H(stats[ST_I_PX_CONN_RATE_MAX].u.u32), + U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, + "<tr><th>Max request rate:</th><td>%s/s</td></tr>", + U2H(stats[ST_I_PX_REQ_RATE_MAX].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions rate : limit */ + "<td>%s</td>", + LIM2A(stats[ST_I_PX_RATE_LIM].u.u32, "-")); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. connections:</th><td>%s</td></tr>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_CONN_TOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>- HTTP/1 sessions:</th><td>%s</td></tr>" + "<tr><th>- HTTP/2 sessions:</th><td>%s</td></tr>" + "<tr><th>- HTTP/3 sessions:</th><td>%s</td></tr>" + "<tr><th>- other sessions:</th><td>%s</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/1 requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/2 requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/3 requests:</th><td>%s</td></tr>" + "<tr><th>- other requests:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_H1SESS].u.u64), + U2H(stats[ST_I_PX_H2SESS].u.u64), + U2H(stats[ST_I_PX_H3SESS].u.u64), + U2H(stats[ST_I_PX_SESS_OTHER].u.u64), + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_H1REQ].u.u64), + U2H(stats[ST_I_PX_H2REQ].u.u64), + U2H(stats[ST_I_PX_H3REQ].u.u64), + U2H(stats[ST_I_PX_REQ_OTHER].u.u64)); + + chunk_appendf(out, + "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" + "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" + "<tr><th>- other responses:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), + U2H(stats[ST_I_PX_COMP_RSP].u.u64), + stats[ST_I_PX_HRSP_2XX].u.u64 ? + (int)(100 * stats[ST_I_PX_COMP_RSP].u.u64 / stats[ST_I_PX_HRSP_2XX].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64)); + + chunk_appendf(out, + "<tr><th>Intercepted requests:</th><td>%s</td></tr>" + "<tr><th>Cache lookups:</th><td>%s</td></tr>" + "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal errors:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_INTERCEPTED].u.u64), + U2H(stats[ST_I_PX_CACHE_LOOKUPS].u.u64), + U2H(stats[ST_I_PX_CACHE_HITS].u.u64), + stats[ST_I_PX_CACHE_LOOKUPS].u.u64 ? + (int)(100 * stats[ST_I_PX_CACHE_HITS].u.u64 / stats[ST_I_PX_CACHE_LOOKUPS].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, lastsess */ + "<td></td><td></td>" + /* bytes : in */ + "<td>%s</td>" + "", + U2H(stats[ST_I_PX_BIN].u.u64)); + + chunk_appendf(out, + /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ + "<td>%s%s<div class=tips><table class=det>" + "<tr><th>Response bytes in:</th><td>%s</td></tr>" + "<tr><th>Compression in:</th><td>%s</td></tr>" + "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Compression bypass:</th><td>%s</td></tr>" + "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" + "</table></div>%s</td>", + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "<u>":"", + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64), + U2H(stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_COMP_IN].u.u64 ? (int)(stats[ST_I_PX_COMP_OUT].u.u64 * 100 / stats[ST_I_PX_COMP_IN].u.u64) : 0, + U2H(stats[ST_I_PX_COMP_BYP].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_BOUT].u.u64 ? (int)((stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64) * 100 / stats[ST_I_PX_BOUT].u.u64) : 0, + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "</u>":""); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors : request, connect, response */ + "<td>%s</td><td></td><td></td>" + /* warnings: retries, redispatches */ + "<td></td><td></td>" + /* server status : reflect frontend status */ + "<td class=ac>%s</td>" + /* rest of server: nothing */ + "<td class=ac colspan=8></td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_EREQ].u.u64), + field_str(stats, ST_I_PX_STATUS)); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SO) { + chunk_appendf(out, "<tr class=socket>"); + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + + chunk_appendf(out, + /* frontend name, listener name */ + "<td class=ac><a name=\"%s/+%s\"></a>%s" + "<a class=lfsb href=\"#%s/+%s\">%s</a>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), + (flags & STAT_F_SHLGNDS)?"<u>":"", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), field_str(stats, ST_I_PX_SVNAME)); + + if (flags & STAT_F_SHLGNDS) { + chunk_appendf(out, "<div class=tips>"); + + if (isdigit((unsigned char)*field_str(stats, ST_I_PX_ADDR))) + chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR) == '[') + chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR)) + chunk_appendf(out, "%s, ", field_str(stats, ST_I_PX_ADDR)); + + chunk_appendf(out, "proto=%s, ", field_str(stats, ST_I_PX_PROTO)); + + /* id */ + chunk_appendf(out, "id: %d</div>", stats[ST_I_PX_SID].u.u32); + } + + chunk_appendf(out, + /* queue */ + "%s</td><td colspan=3></td>" + /* sessions rate: current, max, limit */ + "<td colspan=3> </td>" + /* sessions: current, max, limit, total, lbtot, lastsess */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td>%s</td><td> </td><td> </td>" + /* bytes: in, out */ + "<td>%s</td><td>%s</td>" + "", + (flags & STAT_F_SHLGNDS)?"</u>":"", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), U2H(stats[ST_I_PX_BIN].u.u64), U2H(stats[ST_I_PX_BOUT].u.u64)); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors: request, connect, response */ + "<td>%s</td><td></td><td></td>" + /* warnings: retries, redispatches */ + "<td></td><td></td>" + /* server status: reflect listener status */ + "<td class=ac>%s</td>" + /* rest of server: nothing */ + "<td class=ac colspan=8></td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_EREQ].u.u64), + field_str(stats, ST_I_PX_STATUS)); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SV) { + const char *style; + + /* determine the style to use depending on the server's state, + * its health and weight. There isn't a 1-to-1 mapping between + * state and styles for the cases where the server is (still) + * up. The reason is that we don't want to report nolb and + * drain with the same color. + */ + + if (strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN") == 0 || + strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN (agent)") == 0) { + style = "down"; + } + else if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN ", strlen("DOWN ")) == 0) { + style = "going_up"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "DRAIN") == 0) { + style = "draining"; + } + else if (strncmp(field_str(stats, ST_I_PX_STATUS), "NOLB ", strlen("NOLB ")) == 0) { + style = "going_down"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "NOLB") == 0) { + style = "nolb"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "no check") == 0) { + style = "no_check"; + } + else if (!stats[ST_I_PX_CHKFAIL].type || + stats[ST_I_PX_CHECK_HEALTH].u.u32 == stats[ST_I_PX_CHECK_RISE].u.u32 + stats[ST_I_PX_CHECK_FALL].u.u32 - 1) { + /* no check or max health = UP */ + if (stats[ST_I_PX_WEIGHT].u.u32) + style = "up"; + else + style = "draining"; + } + else { + style = "going_down"; + } + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "MAINT", 5) == 0) + chunk_appendf(out, "<tr class=\"maintain\">"); + else + chunk_appendf(out, + "<tr class=\"%s_%s\">", + (stats[ST_I_PX_BCK].u.u32) ? "backup" : "active", style); + + + if (flags & STAT_F_ADMIN) + chunk_appendf(out, + "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>", + field_str(stats, ST_I_PX_PXNAME), + field_str(stats, ST_I_PX_SVNAME)); + + chunk_appendf(out, + "<td class=ac><a name=\"%s/%s\"></a>%s" + "<a class=lfsb href=\"#%s/%s\">%s</a>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), + (flags & STAT_F_SHLGNDS) ? "<u>" : "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), field_str(stats, ST_I_PX_SVNAME)); + + if (flags & STAT_F_SHLGNDS) { + chunk_appendf(out, "<div class=tips>"); + + if (isdigit((unsigned char)*field_str(stats, ST_I_PX_ADDR))) + chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR) == '[') + chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR)) + chunk_appendf(out, "%s, ", field_str(stats, ST_I_PX_ADDR)); + + /* id */ + chunk_appendf(out, "id: %d, rid: %d", stats[ST_I_PX_SID].u.u32, stats[ST_I_PX_SRID].u.u32); + + /* cookie */ + if (stats[ST_I_PX_COOKIE].type) { + chunk_appendf(out, ", cookie: '"); + chunk_initstr(&src, field_str(stats, ST_I_PX_COOKIE)); + chunk_htmlencode(out, &src); + chunk_appendf(out, "'"); + } + + chunk_appendf(out, "</div>"); + } + + chunk_appendf(out, + /* queue : current, max, limit */ + "%s</td><td>%s</td><td>%s</td><td>%s</td>" + /* sessions rate : current, max, limit */ + "<td>%s</td><td>%s</td><td></td>" + "", + (flags & STAT_F_SHLGNDS) ? "</u>" : "", + U2H(stats[ST_I_PX_QCUR].u.u32), U2H(stats[ST_I_PX_QMAX].u.u32), LIM2A(stats[ST_I_PX_QLIMIT].u.u32, "-"), + U2H(stats[ST_I_PX_RATE].u.u32), U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td><u>%s<div class=tips>" + "<table class=det>" + "<tr><th>Current active connections:</th><td>%s</td></tr>" + "<tr><th>Current used connections:</th><td>%s</td></tr>" + "<tr><th>Current idle connections:</th><td>%s</td></tr>" + "<tr><th>- unsafe:</th><td>%s</td></tr>" + "<tr><th>- safe:</th><td>%s</td></tr>" + "<tr><th>Estimated need of connections:</th><td>%s</td></tr>" + "<tr><th>Active connections limit:</th><td>%s</td></tr>" + "<tr><th>Idle connections limit:</th><td>%s</td></tr>" + "</table></div></u>" + "</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), + U2H(stats[ST_I_PX_SCUR].u.u32), + U2H(stats[ST_I_PX_USED_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_SRV_ICUR].u.u32), + U2H(stats[ST_I_PX_IDLE_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_SAFE_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_NEED_CONN_EST].u.u32), + + LIM2A(stats[ST_I_PX_SLIM].u.u32, "-"), + stats[ST_I_PX_SRV_ILIM].type ? U2H(stats[ST_I_PX_SRV_ILIM].u.u32) : "-", + U2H(stats[ST_I_PX_SMAX].u.u32), LIM2A(stats[ST_I_PX_SLIM].u.u32, "-"), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>New connections:</th><td>%s</td></tr>" + "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal error:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_CONNECT].u.u64), + U2H(stats[ST_I_PX_REUSE].u.u64), + (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64) ? + (int)(100 * stats[ST_I_PX_REUSE].u.u64 / (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64)) : 0, + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_1XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_2XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_3XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_4XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_5XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_OTHER].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); + chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_QT_MAX].u.u32), U2H(stats[ST_I_PX_QTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_CT_MAX].u.u32), U2H(stats[ST_I_PX_CTIME].u.u32)); + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_RT_MAX].u.u32), U2H(stats[ST_I_PX_RTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_TT_MAX].u.u32), U2H(stats[ST_I_PX_TTIME].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, last */ + "<td>%s</td><td>%s</td>", + U2H(stats[ST_I_PX_LBTOT].u.u64), + human_time(stats[ST_I_PX_LASTSESS].u.s32, 1)); + + chunk_appendf(out, + /* bytes : in, out */ + "<td>%s</td><td>%s</td>" + /* denied: req, resp */ + "<td></td><td>%s</td>" + /* errors : request, connect */ + "<td></td><td>%s</td>" + /* errors : response */ + "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" + /* warnings: retries, redispatches */ + "<td>%lld</td><td>%lld</td>" + "", + U2H(stats[ST_I_PX_BIN].u.u64), U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_ECON].u.u64), + U2H(stats[ST_I_PX_ERESP].u.u64), + (long long)stats[ST_I_PX_CLI_ABRT].u.u64, + (long long)stats[ST_I_PX_SRV_ABRT].u.u64, + (long long)stats[ST_I_PX_WRETR].u.u64, + (long long)stats[ST_I_PX_WREDIS].u.u64); + + /* status, last change */ + chunk_appendf(out, "<td class=ac>"); + + /* FIXME!!!! + * LASTCHG should contain the last change for *this* server and must be computed + * properly above, as was done below, ie: this server if maint, otherwise ref server + * if tracking. Note that ref is either local or remote depending on tracking. + */ + + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "MAINT", 5) == 0) { + chunk_appendf(out, "%s MAINT", human_time(stats[ST_I_PX_LASTCHG].u.u32, 1)); + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "no check") == 0) { + chunk_strcat(out, "<i>no check</i>"); + } + else { + chunk_appendf(out, "%s %s", human_time(stats[ST_I_PX_LASTCHG].u.u32, 1), field_str(stats, ST_I_PX_STATUS)); + if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN", 4) == 0) { + if (stats[ST_I_PX_CHECK_HEALTH].u.u32) + chunk_strcat(out, " ↑"); + } + else if (stats[ST_I_PX_CHECK_HEALTH].u.u32 < stats[ST_I_PX_CHECK_RISE].u.u32 + stats[ST_I_PX_CHECK_FALL].u.u32 - 1) + chunk_strcat(out, " ↓"); + } + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN", 4) == 0 && + stats[ST_I_PX_AGENT_STATUS].type && !stats[ST_I_PX_AGENT_HEALTH].u.u32) { + chunk_appendf(out, + "</td><td class=ac><u> %s", + field_str(stats, ST_I_PX_AGENT_STATUS)); + + if (stats[ST_I_PX_AGENT_CODE].type) + chunk_appendf(out, "/%d", stats[ST_I_PX_AGENT_CODE].u.u32); + + if (stats[ST_I_PX_AGENT_DURATION].type) + chunk_appendf(out, " in %lums", (long)stats[ST_I_PX_AGENT_DURATION].u.u64); + + chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_I_PX_AGENT_DESC)); + + if (*field_str(stats, ST_I_PX_LAST_AGT)) { + chunk_appendf(out, ": "); + chunk_initstr(&src, field_str(stats, ST_I_PX_LAST_AGT)); + chunk_htmlencode(out, &src); + } + chunk_appendf(out, "</div></u>"); + } + else if (stats[ST_I_PX_CHECK_STATUS].type) { + chunk_appendf(out, + "</td><td class=ac><u> %s", + field_str(stats, ST_I_PX_CHECK_STATUS)); + + if (stats[ST_I_PX_CHECK_CODE].type) + chunk_appendf(out, "/%d", stats[ST_I_PX_CHECK_CODE].u.u32); + + if (stats[ST_I_PX_CHECK_DURATION].type) + chunk_appendf(out, " in %lums", (long)stats[ST_I_PX_CHECK_DURATION].u.u64); + + chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_I_PX_CHECK_DESC)); + + if (*field_str(stats, ST_I_PX_LAST_CHK)) { + chunk_appendf(out, ": "); + chunk_initstr(&src, field_str(stats, ST_I_PX_LAST_CHK)); + chunk_htmlencode(out, &src); + } + chunk_appendf(out, "</div></u>"); + } + else + chunk_appendf(out, "</td><td>"); + + chunk_appendf(out, + /* weight / uweight */ + "</td><td class=ac>%d/%d</td>" + /* act, bck */ + "<td class=ac>%s</td><td class=ac>%s</td>" + "", + stats[ST_I_PX_WEIGHT].u.u32, stats[ST_I_PX_UWEIGHT].u.u32, + stats[ST_I_PX_BCK].u.u32 ? "-" : "Y", + stats[ST_I_PX_BCK].u.u32 ? "Y" : "-"); + + /* check failures: unique, fatal, down time */ + if (strcmp(field_str(stats, ST_I_PX_STATUS), "MAINT (resolution)") == 0) { + chunk_appendf(out, "<td class=ac colspan=3>resolution</td>"); + } + else if (stats[ST_I_PX_CHKFAIL].type) { + chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_I_PX_CHKFAIL].u.u64); + + if (stats[ST_I_PX_HANAFAIL].type) + chunk_appendf(out, "/%lld", (long long)stats[ST_I_PX_HANAFAIL].u.u64); + + chunk_appendf(out, + "<div class=tips>Failed Health Checks%s</div></u></td>" + "<td>%lld</td><td>%s</td>" + "", + stats[ST_I_PX_HANAFAIL].type ? "/Health Analyses" : "", + (long long)stats[ST_I_PX_CHKDOWN].u.u64, human_time(stats[ST_I_PX_DOWNTIME].u.u32, 1)); + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "MAINT") != 0 && field_format(stats, ST_I_PX_TRACKED) == FF_STR) { + /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */ + chunk_appendf(out, + "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>", + field_str(stats, ST_I_PX_TRACKED), field_str(stats, ST_I_PX_TRACKED)); + } + else + chunk_appendf(out, "<td colspan=3></td>"); + + /* throttle */ + if (stats[ST_I_PX_THROTTLE].type) + chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_I_PX_THROTTLE].u.u32); + else + chunk_appendf(out, "<td class=ac>-</td>"); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>\n"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_BE) { + chunk_appendf(out, "<tr class=\"backend\">"); + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + chunk_appendf(out, + "<td class=ac>" + /* name */ + "%s<a name=\"%s/Backend\"></a>" + "<a class=lfsb href=\"#%s/Backend\">Backend</a>" + "", + (flags & STAT_F_SHLGNDS)?"<u>":"", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_PXNAME)); + + if (flags & STAT_F_SHLGNDS) { + /* balancing */ + chunk_appendf(out, "<div class=tips>balancing: %s", + field_str(stats, ST_I_PX_ALGO)); + + /* cookie */ + if (stats[ST_I_PX_COOKIE].type) { + chunk_appendf(out, ", cookie: '"); + chunk_initstr(&src, field_str(stats, ST_I_PX_COOKIE)); + chunk_htmlencode(out, &src); + chunk_appendf(out, "'"); + } + chunk_appendf(out, "</div>"); + } + + chunk_appendf(out, + "%s</td>" + /* queue : current, max */ + "<td>%s</td><td>%s</td><td></td>" + /* sessions rate : current, max, limit */ + "<td>%s</td><td>%s</td><td></td>" + "", + (flags & STAT_F_SHLGNDS)?"</u>":"", + U2H(stats[ST_I_PX_QCUR].u.u32), U2H(stats[ST_I_PX_QMAX].u.u32), + U2H(stats[ST_I_PX_RATE].u.u32), U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>New connections:</th><td>%s</td></tr>" + "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" + "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" + "<tr><th>- other responses:</th><td>%s</td></tr>" + "<tr><th>Cache lookups:</th><td>%s</td></tr>" + "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal errors:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_CONNECT].u.u64), + U2H(stats[ST_I_PX_REUSE].u.u64), + (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64) ? + (int)(100 * stats[ST_I_PX_REUSE].u.u64 / (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64)) : 0, + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), + U2H(stats[ST_I_PX_COMP_RSP].u.u64), + stats[ST_I_PX_HRSP_2XX].u.u64 ? + (int)(100 * stats[ST_I_PX_COMP_RSP].u.u64 / stats[ST_I_PX_HRSP_2XX].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64), + U2H(stats[ST_I_PX_CACHE_LOOKUPS].u.u64), + U2H(stats[ST_I_PX_CACHE_HITS].u.u64), + stats[ST_I_PX_CACHE_LOOKUPS].u.u64 ? + (int)(100 * stats[ST_I_PX_CACHE_HITS].u.u64 / stats[ST_I_PX_CACHE_LOOKUPS].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); + chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_QT_MAX].u.u32), U2H(stats[ST_I_PX_QTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_CT_MAX].u.u32), U2H(stats[ST_I_PX_CTIME].u.u32)); + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_RT_MAX].u.u32), U2H(stats[ST_I_PX_RTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_TT_MAX].u.u32), U2H(stats[ST_I_PX_TTIME].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, last */ + "<td>%s</td><td>%s</td>" + /* bytes: in */ + "<td>%s</td>" + "", + U2H(stats[ST_I_PX_LBTOT].u.u64), + human_time(stats[ST_I_PX_LASTSESS].u.s32, 1), + U2H(stats[ST_I_PX_BIN].u.u64)); + + chunk_appendf(out, + /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ + "<td>%s%s<div class=tips><table class=det>" + "<tr><th>Response bytes in:</th><td>%s</td></tr>" + "<tr><th>Compression in:</th><td>%s</td></tr>" + "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Compression bypass:</th><td>%s</td></tr>" + "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" + "</table></div>%s</td>", + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "<u>":"", + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64), + U2H(stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_COMP_IN].u.u64 ? (int)(stats[ST_I_PX_COMP_OUT].u.u64 * 100 / stats[ST_I_PX_COMP_IN].u.u64) : 0, + U2H(stats[ST_I_PX_COMP_BYP].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_BOUT].u.u64 ? (int)((stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64) * 100 / stats[ST_I_PX_BOUT].u.u64) : 0, + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "</u>":""); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors : request, connect */ + "<td></td><td>%s</td>" + /* errors : response */ + "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" + /* warnings: retries, redispatches */ + "<td>%lld</td><td>%lld</td>" + /* backend status: reflect backend status (up/down): we display UP + * if the backend has known working servers or if it has no server at + * all (eg: for stats). Then we display the total weight, number of + * active and backups. */ + "<td class=ac>%s %s</td><td class=ac> </td><td class=ac>%d/%d</td>" + "<td class=ac>%d</td><td class=ac>%d</td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_ECON].u.u64), + U2H(stats[ST_I_PX_ERESP].u.u64), + (long long)stats[ST_I_PX_CLI_ABRT].u.u64, + (long long)stats[ST_I_PX_SRV_ABRT].u.u64, + (long long)stats[ST_I_PX_WRETR].u.u64, (long long)stats[ST_I_PX_WREDIS].u.u64, + human_time(stats[ST_I_PX_LASTCHG].u.u32, 1), + strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN") ? field_str(stats, ST_I_PX_STATUS) : "<font color=\"red\"><b>DOWN</b></font>", + stats[ST_I_PX_WEIGHT].u.u32, stats[ST_I_PX_UWEIGHT].u.u32, + stats[ST_I_PX_ACT].u.u32, stats[ST_I_PX_BCK].u.u32); + + chunk_appendf(out, + /* rest of backend: nothing, down transitions, total downtime, throttle */ + "<td class=ac> </td><td>%d</td>" + "<td>%s</td>" + "<td></td>", + stats[ST_I_PX_CHKDOWN].u.u32, + stats[ST_I_PX_DOWNTIME].type ? human_time(stats[ST_I_PX_DOWNTIME].u.u32, 1) : " "); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + + return 1; +} + +/* Dumps the HTML table header for proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + struct stats_module *mod; + int stats_module_len = 0; + + if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_F_ADMIN)) { + /* A form to enable/disable this proxy servers */ + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + chunk_appendf(chk, + "<form method=\"post\">"); + } + + /* print a new table */ + chunk_appendf(chk, + "<table class=\"tbl\" width=\"100%%\">\n" + "<tr class=\"titre\">" + "<th class=\"pxname\" width=\"10%%\">"); + + chunk_appendf(chk, + "<a name=\"%s\"></a>%s" + "<a class=px href=\"#%s\">%s</a>", + px->id, + (ctx->flags & STAT_F_SHLGNDS) ? "<u>":"", + px->id, px->id); + + if (ctx->flags & STAT_F_SHLGNDS) { + /* cap, mode, id */ + chunk_appendf(chk, "<div class=tips>cap: %s, mode: %s, id: %d", + proxy_cap_str(px->cap), proxy_mode_str(px->mode), + px->uuid); + chunk_appendf(chk, "</div>"); + } + + chunk_appendf(chk, + "%s</th>" + "<th class=\"%s\" width=\"90%%\">%s</th>" + "</tr>\n" + "</table>\n" + "<table class=\"tbl\" width=\"100%%\">\n" + "<tr class=\"titre\">", + (ctx->flags & STAT_F_SHLGNDS) ? "</u>":"", + px->desc ? "desc" : "empty", px->desc ? px->desc : ""); + + if (ctx->flags & STAT_F_ADMIN) { + /* Column heading for Enable or Disable server */ + if ((px->cap & PR_CAP_BE) && px->srv) + chunk_appendf(chk, + "<th rowspan=2 width=1><input type=\"checkbox\" " + "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) " + "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>", + px->id, + px->id); + else + chunk_appendf(chk, "<th rowspan=2></th>"); + } + + chunk_appendf(chk, + "<th rowspan=2></th>" + "<th colspan=3>Queue</th>" + "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>" + "<th colspan=2>Bytes</th><th colspan=2>Denied</th>" + "<th colspan=3>Errors</th><th colspan=2>Warnings</th>" + "<th colspan=9>Server</th>"); + + if (ctx->flags & STAT_F_SHMODULES) { + // calculate the count of module for colspan attribute + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + ++stats_module_len; + } + chunk_appendf(chk, "<th colspan=%d>Extra modules</th>", + stats_module_len); + } + + chunk_appendf(chk, + "</tr>\n" + "<tr class=\"titre\">" + "<th>Cur</th><th>Max</th><th>Limit</th>" + "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>" + "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>" + "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>" + "<th>Resp</th><th>Retr</th><th>Redis</th>" + "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>" + "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>" + "<th>Thrtle</th>\n"); + + if (ctx->flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(chk, "<th>%s</th>", mod->name); + } + } + + chunk_appendf(chk, "</tr>"); +} + +/* Dumps the HTML table trailer for proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_px_end(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + + chunk_appendf(chk, "</table>"); + + if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_F_ADMIN)) { + /* close the form used to enable/disable this proxy servers */ + chunk_appendf(chk, + "Choose the action to perform on the checked servers : " + "<select name=action>" + "<option value=\"\"></option>" + "<option value=\"ready\">Set state to READY</option>" + "<option value=\"drain\">Set state to DRAIN</option>" + "<option value=\"maint\">Set state to MAINT</option>" + "<option value=\"dhlth\">Health: disable checks</option>" + "<option value=\"ehlth\">Health: enable checks</option>" + "<option value=\"hrunn\">Health: force UP</option>" + "<option value=\"hnolb\">Health: force NOLB</option>" + "<option value=\"hdown\">Health: force DOWN</option>" + "<option value=\"dagent\">Agent: disable checks</option>" + "<option value=\"eagent\">Agent: enable checks</option>" + "<option value=\"arunn\">Agent: force UP</option>" + "<option value=\"adown\">Agent: force DOWN</option>" + "<option value=\"shutdown\">Kill Sessions</option>" + "</select>" + "<input type=\"hidden\" name=\"b\" value=\"#%d\">" + " <input type=\"submit\" value=\"Apply\">" + "</form>", + px->uuid); + } + + chunk_appendf(chk, "<p>\n"); +} + +/* Dumps the HTML stats trailer block to <out> buffer. The caller is + * responsible for clearing it if needed. + */ +void stats_dump_html_end(struct buffer *out) +{ + chunk_appendf(out, "</body></html>\n"); +} + + +static int stats_send_http_headers(struct stconn *sc, struct htx *htx) +{ + struct uri_auth *uri; + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct htx_sl *sl; + unsigned int flags; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK); + sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK")); + if (!sl) + goto full; + sl->info.res.status = 200; + + if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache"))) + goto full; + if (ctx->flags & STAT_F_FMT_HTML) { + if (!htx_add_header(htx, ist("Content-Type"), ist("text/html"))) + goto full; + } + else if (ctx->flags & (STAT_F_FMT_JSON|STAT_F_JSON_SCHM)) { + if (!htx_add_header(htx, ist("Content-Type"), ist("application/json"))) + goto full; + } + else { + if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain"))) + goto full; + } + + if (uri->refresh > 0 && !(ctx->flags & STAT_F_NO_REFRESH)) { + const char *refresh = U2A(uri->refresh); + if (!htx_add_header(htx, ist("Refresh"), ist(refresh))) + goto full; + } + + if (ctx->flags & STAT_F_CHUNKED) { + if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked"))) + goto full; + } + + if (!htx_add_endof(htx, HTX_BLK_EOH)) + goto full; + return 1; + + full: + htx_reset(htx); + applet_set_eos(appctx); + applet_set_error(appctx); + return 0; +} + +static int stats_send_http_redirect(struct stconn *sc, struct htx *htx) +{ + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + struct uri_auth *uri; + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct htx_sl *sl; + unsigned int flags; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + /* We don't want to land on the posted stats page because a refresh will + * repost the data. We don't want this to happen on accident so we redirect + * the browse to the stats page with a GET. + */ + chunk_printf(&trash, "%s;st=%s%s%s%s", + uri->uri_prefix, + ((ctx->st_code > STAT_STATUS_INIT) && + (ctx->st_code < STAT_STATUS_SIZE) && + stat_status_codes[ctx->st_code]) ? + stat_status_codes[ctx->st_code] : + stat_status_codes[STAT_STATUS_UNKN], + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_BODYLESS); + sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other")); + if (!sl) + goto full; + sl->info.res.status = 303; + + if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || + !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) || + !htx_add_header(htx, ist("Content-Length"), ist("0")) || + !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data))) + goto full; + + if (!htx_add_endof(htx, HTX_BLK_EOH)) + goto full; + + return 1; + + full: + htx_reset(htx); + applet_set_eos(appctx); + applet_set_error(appctx); + return 0; +} + +/* We reached the stats page through a POST request. The appctx is + * expected to have already been allocated by the caller. + * Parse the posted data and enable/disable servers if necessary. + * Returns 1 if request was parsed or zero if it needs more data. + */ +static int stats_process_http_post(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + + struct proxy *px = NULL; + struct server *sv = NULL; + + char key[LINESIZE]; + int action = ST_ADM_ACTION_NONE; + int reprocess = 0; + + int total_servers = 0; + int altered_servers = 0; + + char *first_param, *cur_param, *next_param, *end_params; + char *st_cur_param = NULL; + char *st_next_param = NULL; + + struct buffer *temp = get_trash_chunk(); + + struct htx *htx = htxbuf(&appctx->inbuf); + struct htx_blk *blk; + + /* we need more data */ + if (!(htx->flags & HTX_FL_EOM)) { + /* check if we can receive more */ + if (applet_fl_test(appctx, APPCTX_FL_INBLK_FULL)) { + ctx->st_code = STAT_STATUS_EXCD; + goto out; + } + goto wait; + } + + /* The request was fully received. Copy data */ + blk = htx_get_head_blk(htx); + while (blk) { + enum htx_blk_type type = htx_get_blk_type(blk); + + if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) + break; + if (type == HTX_BLK_DATA) { + struct ist v = htx_get_blk_value(htx, blk); + + if (!chunk_memcat(temp, v.ptr, v.len)) { + ctx->st_code = STAT_STATUS_EXCD; + goto out; + } + } + blk = htx_get_next_blk(htx, blk); + } + + first_param = temp->area; + end_params = temp->area + temp->data; + cur_param = next_param = end_params; + *end_params = '\0'; + + ctx->st_code = STAT_STATUS_NONE; + + /* + * Parse the parameters in reverse order to only store the last value. + * From the html form, the backend and the action are at the end. + */ + while (cur_param > first_param) { + char *value; + int poffset, plen; + + cur_param--; + + if ((*cur_param == '&') || (cur_param == first_param)) { + reprocess_servers: + /* Parse the key */ + poffset = (cur_param != first_param ? 1 : 0); + plen = next_param - cur_param + (cur_param == first_param ? 1 : 0); + if ((plen > 0) && (plen <= sizeof(key))) { + strncpy(key, cur_param + poffset, plen); + key[plen - 1] = '\0'; + } else { + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + + /* Parse the value */ + value = key; + while (*value != '\0' && *value != '=') { + value++; + } + if (*value == '=') { + /* Ok, a value is found, we can mark the end of the key */ + *value++ = '\0'; + } + if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0) + break; + + /* Now we can check the key to see what to do */ + if (!px && (strcmp(key, "b") == 0)) { + if ((px = proxy_be_by_name(value)) == NULL) { + /* the backend name is unknown or ambiguous (duplicate names) */ + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + } + else if (!action && (strcmp(key, "action") == 0)) { + if (strcmp(value, "ready") == 0) { + action = ST_ADM_ACTION_READY; + } + else if (strcmp(value, "drain") == 0) { + action = ST_ADM_ACTION_DRAIN; + } + else if (strcmp(value, "maint") == 0) { + action = ST_ADM_ACTION_MAINT; + } + else if (strcmp(value, "shutdown") == 0) { + action = ST_ADM_ACTION_SHUTDOWN; + } + else if (strcmp(value, "dhlth") == 0) { + action = ST_ADM_ACTION_DHLTH; + } + else if (strcmp(value, "ehlth") == 0) { + action = ST_ADM_ACTION_EHLTH; + } + else if (strcmp(value, "hrunn") == 0) { + action = ST_ADM_ACTION_HRUNN; + } + else if (strcmp(value, "hnolb") == 0) { + action = ST_ADM_ACTION_HNOLB; + } + else if (strcmp(value, "hdown") == 0) { + action = ST_ADM_ACTION_HDOWN; + } + else if (strcmp(value, "dagent") == 0) { + action = ST_ADM_ACTION_DAGENT; + } + else if (strcmp(value, "eagent") == 0) { + action = ST_ADM_ACTION_EAGENT; + } + else if (strcmp(value, "arunn") == 0) { + action = ST_ADM_ACTION_ARUNN; + } + else if (strcmp(value, "adown") == 0) { + action = ST_ADM_ACTION_ADOWN; + } + /* else these are the old supported methods */ + else if (strcmp(value, "disable") == 0) { + action = ST_ADM_ACTION_DISABLE; + } + else if (strcmp(value, "enable") == 0) { + action = ST_ADM_ACTION_ENABLE; + } + else if (strcmp(value, "stop") == 0) { + action = ST_ADM_ACTION_STOP; + } + else if (strcmp(value, "start") == 0) { + action = ST_ADM_ACTION_START; + } + else { + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + } + else if (strcmp(key, "s") == 0) { + if (!(px && action)) { + /* + * Indicates that we'll need to reprocess the parameters + * as soon as backend and action are known + */ + if (!reprocess) { + st_cur_param = cur_param; + st_next_param = next_param; + } + reprocess = 1; + } + else if ((sv = findserver(px, value)) != NULL) { + HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); + switch (action) { + case ST_ADM_ACTION_DISABLE: + if (!(sv->cur_admin & SRV_ADMF_FMAINT)) { + altered_servers++; + total_servers++; + srv_set_admin_flag(sv, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_STATS_DISABLE); + } + break; + case ST_ADM_ACTION_ENABLE: + if (sv->cur_admin & SRV_ADMF_FMAINT) { + altered_servers++; + total_servers++; + srv_clr_admin_flag(sv, SRV_ADMF_FMAINT); + } + break; + case ST_ADM_ACTION_STOP: + if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) { + srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_STATS_STOP); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_START: + if (sv->cur_admin & SRV_ADMF_FDRAIN) { + srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_DHLTH: + if (sv->check.state & CHK_ST_CONFIGURED) { + sv->check.state &= ~CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_EHLTH: + if (sv->check.state & CHK_ST_CONFIGURED) { + sv->check.state |= CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HRUNN: + if (!(sv->track)) { + sv->check.health = sv->check.rise + sv->check.fall - 1; + srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HNOLB: + if (!(sv->track)) { + sv->check.health = sv->check.rise + sv->check.fall - 1; + srv_set_stopping(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HDOWN: + if (!(sv->track)) { + sv->check.health = 0; + srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_DAGENT: + if (sv->agent.state & CHK_ST_CONFIGURED) { + sv->agent.state &= ~CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_EAGENT: + if (sv->agent.state & CHK_ST_CONFIGURED) { + sv->agent.state |= CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_ARUNN: + if (sv->agent.state & CHK_ST_ENABLED) { + sv->agent.health = sv->agent.rise + sv->agent.fall - 1; + srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_ADOWN: + if (sv->agent.state & CHK_ST_ENABLED) { + sv->agent.health = 0; + srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_READY: + srv_adm_set_ready(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_DRAIN: + srv_adm_set_drain(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_MAINT: + srv_adm_set_maint(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_SHUTDOWN: + if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) { + srv_shutdown_streams(sv, SF_ERR_KILLED); + altered_servers++; + total_servers++; + } + break; + } + HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); + } else { + /* the server name is unknown or ambiguous (duplicate names) */ + total_servers++; + } + } + if (reprocess && px && action) { + /* Now, we know the backend and the action chosen by the user. + * We can safely restart from the first server parameter + * to reprocess them + */ + cur_param = st_cur_param; + next_param = st_next_param; + reprocess = 0; + goto reprocess_servers; + } + + next_param = cur_param; + } + } + + if (total_servers == 0) { + ctx->st_code = STAT_STATUS_NONE; + } + else if (altered_servers == 0) { + ctx->st_code = STAT_STATUS_ERRP; + } + else if (altered_servers == total_servers) { + ctx->st_code = STAT_STATUS_DONE; + } + else { + ctx->st_code = STAT_STATUS_PART; + } + out: + return 1; + wait: + ctx->st_code = STAT_STATUS_NONE; + return 0; +} + +/* This I/O handler runs as an applet embedded in a stream connector. It is + * used to send HTTP stats over a TCP socket. The mechanism is very simple. + * appctx->st0 contains the operation in progress (dump, done). The handler + * automatically unregisters itself once transfer is complete. + */ +static void http_stats_io_handler(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct stconn *sc = appctx_sc(appctx); + struct htx *res_htx = NULL; + + /* only proxy stats are available via http */ + ctx->domain = STATS_DOMAIN_PROXY; + + if (applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC|APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto out; + + if (applet_fl_test(appctx, APPCTX_FL_FASTFWD) && se_fl_test(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD)) + goto out; + + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + goto out; + } + + res_htx = htx_from_buf(&appctx->outbuf); + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + appctx->st0 = STAT_HTTP_END; + goto out; + } + + /* all states are processed in sequence */ + if (appctx->st0 == STAT_HTTP_HEAD) { + if (stats_send_http_headers(sc, res_htx)) { + struct ist meth = htx_sl_req_meth(http_get_stline(htxbuf(&appctx->inbuf))); + + if (find_http_meth(istptr(meth), istlen(meth)) == HTTP_METH_HEAD) + appctx->st0 = STAT_HTTP_DONE; + else { + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_APPLET)) + se_fl_set(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + appctx->st0 = STAT_HTTP_DUMP; + } + } + } + + if (appctx->st0 == STAT_HTTP_DUMP) { + ctx->chunk = b_make(trash.area, appctx->outbuf.size, 0, 0); + /* adjust buffer size to take htx overhead into account, + * make sure to perform this call on an empty buffer + */ + ctx->chunk.size = buf_room_for_htx_data(&ctx->chunk); + if (stats_dump_stat_to_buffer(sc, NULL, res_htx)) + appctx->st0 = STAT_HTTP_DONE; + } + + if (appctx->st0 == STAT_HTTP_POST) { + if (stats_process_http_post(sc)) + appctx->st0 = STAT_HTTP_LAST; + } + + if (appctx->st0 == STAT_HTTP_LAST) { + if (stats_send_http_redirect(sc, res_htx)) + appctx->st0 = STAT_HTTP_DONE; + } + + if (appctx->st0 == STAT_HTTP_DONE) { + /* no more data are expected. If the response buffer is empty, + * be sure to add something (EOT block in this case) to have + * something to send. It is important to be sure the EOM flags + * will be handled by the endpoint. + */ + if (htx_is_empty(res_htx)) { + if (!htx_add_endof(res_htx, HTX_BLK_EOT)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto out; + } + } + res_htx->flags |= HTX_FL_EOM; + applet_set_eoi(appctx); + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + appctx->st0 = STAT_HTTP_END; + } + + if (appctx->st0 == STAT_HTTP_END) { + applet_set_eos(appctx); + applet_will_consume(appctx); + } + + out: + /* we have left the request in the buffer for the case where we + * process a POST, and this automatically re-enables activity on + * read. It's better to indicate that we want to stop reading when + * we're sending, so that we know there's at most one direction + * deciding to wake the applet up. It saves it from looping when + * emitting large blocks into small TCP windows. + */ + if (res_htx) + htx_to_buf(res_htx, &appctx->outbuf); + + if (appctx->st0 == STAT_HTTP_END) { + /* eat the whole request */ + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + appctx->sedesc->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; + } + else if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_FULL)) + applet_wont_consume(appctx); +} + +static size_t http_stats_fastfwd(struct appctx *appctx, struct buffer *buf, + size_t count, unsigned int flags) +{ + struct stconn *sc = appctx_sc(appctx); + size_t ret = 0; + + ret = b_data(buf); + if (stats_dump_stat_to_buffer(sc, buf, NULL)) { + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + appctx->st0 = STAT_HTTP_DONE; + } + + ret = b_data(buf) - ret; + return ret; +} + +static void http_stats_release(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + + if (ctx->px_st == STAT_PX_ST_SV) + srv_drop(ctx->obj2); +} + +struct applet http_stats_applet = { + .obj_type = OBJ_TYPE_APPLET, + .name = "<STATS>", /* used for logging */ + .fct = http_stats_io_handler, + .rcv_buf = appctx_htx_rcv_buf, + .snd_buf = appctx_htx_snd_buf, + .fastfwd = http_stats_fastfwd, + .release = http_stats_release, +}; diff --git a/src/stats-json.c b/src/stats-json.c new file mode 100644 index 0000000..b493853 --- /dev/null +++ b/src/stats-json.c @@ -0,0 +1,533 @@ +#include <haproxy/stats-json.h> + +#include <stdio.h> + +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/stats.h> + +/* Emits an encoding of the field type as JSON. + * Returns non-zero on success, 0 if the buffer is full. + */ +static int stats_emit_json_field_tags(struct buffer *out, const struct field *f) +{ + const char *origin, *nature, *scope; + int old_len; + + switch (field_origin(f, 0)) { + case FO_METRIC: origin = "Metric"; break; + case FO_STATUS: origin = "Status"; break; + case FO_KEY: origin = "Key"; break; + case FO_CONFIG: origin = "Config"; break; + case FO_PRODUCT: origin = "Product"; break; + default: origin = "Unknown"; break; + } + + switch (field_nature(f, 0)) { + case FN_GAUGE: nature = "Gauge"; break; + case FN_LIMIT: nature = "Limit"; break; + case FN_MIN: nature = "Min"; break; + case FN_MAX: nature = "Max"; break; + case FN_RATE: nature = "Rate"; break; + case FN_COUNTER: nature = "Counter"; break; + case FN_DURATION: nature = "Duration"; break; + case FN_AGE: nature = "Age"; break; + case FN_TIME: nature = "Time"; break; + case FN_NAME: nature = "Name"; break; + case FN_OUTPUT: nature = "Output"; break; + case FN_AVG: nature = "Avg"; break; + default: nature = "Unknown"; break; + } + + switch (field_scope(f, 0)) { + case FS_PROCESS: scope = "Process"; break; + case FS_SERVICE: scope = "Service"; break; + case FS_SYSTEM: scope = "System"; break; + case FS_CLUSTER: scope = "Cluster"; break; + default: scope = "Unknown"; break; + } + + old_len = out->data; + chunk_appendf(out, "\"tags\":{" + "\"origin\":\"%s\"," + "\"nature\":\"%s\"," + "\"scope\":\"%s\"" + "}", origin, nature, scope); + return !(old_len == out->data); +} + +/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per + * the recommendation for interoperable integers in section 6 of RFC 7159. + */ +#define JSON_INT_MAX ((1ULL << 53) - 1) +#define JSON_INT_MIN (0 - JSON_INT_MAX) + +/* Emits a stats field value and its type in JSON. + * Returns non-zero on success, 0 on error. + */ +static int stats_emit_json_data_field(struct buffer *out, const struct field *f) +{ + int old_len; + char buf[20]; + const char *type, *value = buf, *quote = ""; + + switch (field_format(f, 0)) { + case FF_EMPTY: return 1; + case FF_S32: type = "\"s32\""; + snprintf(buf, sizeof(buf), "%d", f->u.s32); + break; + case FF_U32: type = "\"u32\""; + snprintf(buf, sizeof(buf), "%u", f->u.u32); + break; + case FF_S64: type = "\"s64\""; + if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX) + return 0; + type = "\"s64\""; + snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64); + break; + case FF_U64: if (f->u.u64 > JSON_INT_MAX) + return 0; + type = "\"u64\""; + snprintf(buf, sizeof(buf), "%llu", + (unsigned long long) f->u.u64); + break; + case FF_FLT: type = "\"flt\""; + flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt)); + break; + case FF_STR: type = "\"str\""; + value = field_str(f, 0); + quote = "\""; + break; + default: snprintf(buf, sizeof(buf), "%u", f->type); + type = buf; + value = "unknown"; + quote = "\""; + break; + } + + old_len = out->data; + chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}", + type, quote, value, quote); + return !(old_len == out->data); +} + +static void stats_print_proxy_field_json(struct buffer *out, + const struct field *stat, + const char *name, + int pos, + uint32_t field_type, + uint32_t iid, + uint32_t sid, + uint32_t pid) +{ + const char *obj_type; + switch (field_type) { + case STATS_TYPE_FE: obj_type = "Frontend"; break; + case STATS_TYPE_BE: obj_type = "Backend"; break; + case STATS_TYPE_SO: obj_type = "Listener"; break; + case STATS_TYPE_SV: obj_type = "Server"; break; + default: obj_type = "Unknown"; break; + } + + chunk_appendf(out, + "{" + "\"objType\":\"%s\"," + "\"proxyId\":%u," + "\"id\":%u," + "\"field\":{\"pos\":%d,\"name\":\"%s\"}," + "\"processNum\":%u,", + obj_type, iid, sid, pos, name, pid); +} + +static void stats_print_rslv_field_json(struct buffer *out, + const struct field *stat, + const char *name, + int pos) +{ + chunk_appendf(out, + "{" + "\"field\":{\"pos\":%d,\"name\":\"%s\"},", + pos, name); +} + + +/* Dumps the stats JSON header to <out> buffer. The caller is responsible for + * clearing it if needed. + */ +void stats_dump_json_header(struct buffer *out) +{ + chunk_strcat(out, "["); +} + +/* Dump all fields from <line> into <out> using a typed "field:desc:type:value" format */ +int stats_dump_fields_json(struct buffer *out, + const struct field *line, size_t stats_count, + struct show_stat_ctx *ctx) +{ + int flags = ctx->flags; + int domain = ctx->domain; + int started = (ctx->field) ? 1 : 0; + int ready_data = 0; + + if (!started && (flags & STAT_F_STARTED) && !chunk_strcat(out, ",")) + return 0; + if (!started && !chunk_strcat(out, "[")) + return 0; + + for (; ctx->field < stats_count; ctx->field++) { + int old_len; + int i = ctx->field; + + if (!line[i].type) + continue; + + if (started && !chunk_strcat(out, ",")) + goto err; + started = 1; + + old_len = out->data; + if (domain == STATS_DOMAIN_PROXY) { + stats_print_proxy_field_json(out, &line[i], + stat_cols[domain][i].name, + i, + line[ST_I_PX_TYPE].u.u32, + line[ST_I_PX_IID].u.u32, + line[ST_I_PX_SID].u.u32, + line[ST_I_PX_PID].u.u32); + } else if (domain == STATS_DOMAIN_RESOLVERS) { + stats_print_rslv_field_json(out, &line[i], + stat_cols[domain][i].name, + i); + } + + if (old_len == out->data) + goto err; + + if (!stats_emit_json_field_tags(out, &line[i])) + goto err; + + if (!stats_emit_json_data_field(out, &line[i])) + goto err; + + if (!chunk_strcat(out, "}")) + goto err; + ready_data = out->data; + } + + if (!chunk_strcat(out, "]")) + goto err; + + ctx->field = 0; /* we're done */ + return 1; + +err: + if (!ready_data) { + /* not enough buffer space for a single entry.. */ + chunk_reset(out); + if (ctx->flags & STAT_F_STARTED) + chunk_strcat(out, ","); + chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}"); + return 0; /* hard error */ + } + /* push ready data and wait for a new buffer to complete the dump */ + out->data = ready_data; + return 1; +} + +/* Dumps the JSON stats trailer block to <out> buffer. The caller is + * responsible for clearing it if needed. + */ +void stats_dump_json_end(struct buffer *out) +{ + chunk_strcat(out, "]\n"); +} + +/* Dump all fields from <stats> into <out> using the "show info json" format */ +int stats_dump_json_info_fields(struct buffer *out, + const struct field *info, + struct show_stat_ctx *ctx) +{ + int started = (ctx->field) ? 1 : 0; + int ready_data = 0; + + if (!started && !chunk_strcat(out, "[")) + return 0; + + for (; ctx->field < ST_I_INF_MAX; ctx->field++) { + int old_len; + int i = ctx->field; + + if (!field_format(info, i)) + continue; + + if (started && !chunk_strcat(out, ",")) + goto err; + started = 1; + + old_len = out->data; + chunk_appendf(out, + "{\"field\":{\"pos\":%d,\"name\":\"%s\"}," + "\"processNum\":%u,", + i, stat_cols_info[i].name, + info[ST_I_INF_PROCESS_NUM].u.u32); + if (old_len == out->data) + goto err; + + if (!stats_emit_json_field_tags(out, &info[i])) + goto err; + + if (!stats_emit_json_data_field(out, &info[i])) + goto err; + + if (!chunk_strcat(out, "}")) + goto err; + ready_data = out->data; + } + + if (!chunk_strcat(out, "]\n")) + goto err; + ctx->field = 0; /* we're done */ + return 1; + +err: + if (!ready_data) { + /* not enough buffer space for a single entry.. */ + chunk_reset(out); + chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n"); + return 0; /* hard error */ + } + /* push ready data and wait for a new buffer to complete the dump */ + out->data = ready_data; + return 1; +} + +/* This function dumps the schema onto the stream connector's read buffer. + * It returns 0 as long as it does not complete, non-zero upon completion. + * No state is used. + * + * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as + * per the recommendation for interoperable integers in section 6 of RFC 7159. + */ +void stats_dump_json_schema(struct buffer *out) +{ + + int old_len = out->data; + + chunk_strcat(out, + "{" + "\"$schema\":\"http://json-schema.org/draft-04/schema#\"," + "\"oneOf\":[" + "{" + "\"title\":\"Info\"," + "\"type\":\"array\"," + "\"items\":{" + "\"title\":\"InfoItem\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"field\":{\"$ref\":\"#/definitions/field\"}," + "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," + "\"tags\":{\"$ref\":\"#/definitions/tags\"}," + "\"value\":{\"$ref\":\"#/definitions/typedValue\"}" + "}," + "\"required\":[\"field\",\"processNum\",\"tags\"," + "\"value\"]" + "}" + "}," + "{" + "\"title\":\"Stat\"," + "\"type\":\"array\"," + "\"items\":{" + "\"title\":\"InfoItem\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"objType\":{" + "\"enum\":[\"Frontend\",\"Backend\",\"Listener\"," + "\"Server\",\"Unknown\"]" + "}," + "\"proxyId\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"id\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"field\":{\"$ref\":\"#/definitions/field\"}," + "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," + "\"tags\":{\"$ref\":\"#/definitions/tags\"}," + "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}" + "}," + "\"required\":[\"objType\",\"proxyId\",\"id\"," + "\"field\",\"processNum\",\"tags\"," + "\"value\"]" + "}" + "}," + "{" + "\"title\":\"Error\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"errorStr\":{" + "\"type\":\"string\"" + "}" + "}," + "\"required\":[\"errorStr\"]" + "}" + "]," + "\"definitions\":{" + "\"field\":{" + "\"type\":\"object\"," + "\"pos\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"name\":{" + "\"type\":\"string\"" + "}," + "\"required\":[\"pos\",\"name\"]" + "}," + "\"processNum\":{" + "\"type\":\"integer\"," + "\"minimum\":1" + "}," + "\"tags\":{" + "\"type\":\"object\"," + "\"origin\":{" + "\"type\":\"string\"," + "\"enum\":[\"Metric\",\"Status\",\"Key\"," + "\"Config\",\"Product\",\"Unknown\"]" + "}," + "\"nature\":{" + "\"type\":\"string\"," + "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\"," + "\"Rate\",\"Counter\",\"Duration\"," + "\"Age\",\"Time\",\"Name\",\"Output\"," + "\"Avg\", \"Unknown\"]" + "}," + "\"scope\":{" + "\"type\":\"string\"," + "\"enum\":[\"Cluster\",\"Process\",\"Service\"," + "\"System\",\"Unknown\"]" + "}," + "\"required\":[\"origin\",\"nature\",\"scope\"]" + "}," + "\"typedValue\":{" + "\"type\":\"object\"," + "\"oneOf\":[" + "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}" + "]," + "\"definitions\":{" + "\"s32Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"s32\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":-2147483648," + "\"maximum\":2147483647" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"s64Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"s64\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":-9007199254740991," + "\"maximum\":9007199254740991" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"u32Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"u32\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":0," + "\"maximum\":4294967295" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"u64Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"u64\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":0," + "\"maximum\":9007199254740991" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"strValue\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"str\"]" + "}," + "\"value\":{\"type\":\"string\"}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"unknownValue\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"value\":{" + "\"type\":\"string\"," + "\"enum\":[\"unknown\"]" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}" + "}" + "}" + "}" + "}"); + + if (old_len == out->data) { + chunk_reset(out); + chunk_appendf(out, + "{\"errorStr\":\"output buffer too short\"}"); + } + chunk_appendf(out, "\n"); +} + +/* This function dumps the schema onto the stream connector's read buffer. + * It returns 0 as long as it does not complete, non-zero upon completion. + * No state is used. + */ +int stats_dump_json_schema_to_buffer(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + + chunk_reset(chk); + + stats_dump_json_schema(chk); + + if (applet_putchk(appctx, chk) == -1) + return 0; + + return 1; +} diff --git a/src/stats-proxy.c b/src/stats-proxy.c new file mode 100644 index 0000000..a158d87 --- /dev/null +++ b/src/stats-proxy.c @@ -0,0 +1,1686 @@ +#include <haproxy/stats-proxy.h> + +#include <errno.h> +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/backend.h> +#include <haproxy/check.h> +#include <haproxy/chunk.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/list.h> +#include <haproxy/listener.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy.h> +#include <haproxy/stats.h> +#include <haproxy/stats-html.h> +#include <haproxy/server.h> +#include <haproxy/stconn.h> +#include <haproxy/time.h> +#include <haproxy/tools.h> + +/* Define a new metric for both frontend and backend sides. */ +#define ME_NEW_PX(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[0] = offsetof(struct fe_counters, offset_f), \ + .metric.offset[1] = offsetof(struct be_counters, offset_f), \ + .cap = (cap_f), \ + } + +/* Define a new metric for frontend side only. */ +#define ME_NEW_FE(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[0] = offsetof(struct fe_counters, offset_f), \ + .cap = (cap_f), \ + } + +/* Define a new metric for backend side only. */ +#define ME_NEW_BE(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[1] = offsetof(struct be_counters, offset_f), \ + .cap = (cap_f), \ + } + +const struct stat_col stat_cols_px[ST_I_PX_MAX] = { + [ST_I_PX_PXNAME] = { .name = "pxname", .desc = "Proxy name" }, + [ST_I_PX_SVNAME] = { .name = "svname", .desc = "Server name" }, + [ST_I_PX_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" }, + [ST_I_PX_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" }, + [ST_I_PX_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" }, + [ST_I_PX_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" }, + [ST_I_PX_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" }, + [ST_I_PX_STOT] = ME_NEW_PX("stot", FN_COUNTER, FF_U64, cum_sess, STATS_PX_CAP_LFBS, "Total number of sessions since process started"), + [ST_I_PX_BIN] = ME_NEW_PX("bin", FN_COUNTER, FF_U64, bytes_in, STATS_PX_CAP_LFBS, "Total number of request bytes since process started"), + [ST_I_PX_BOUT] = ME_NEW_PX("bout", FN_COUNTER, FF_U64, bytes_out, STATS_PX_CAP_LFBS, "Total number of response bytes since process started"), + [ST_I_PX_DREQ] = ME_NEW_PX("dreq", FN_COUNTER, FF_U64, denied_req, STATS_PX_CAP_LFB_, "Total number of denied requests since process started"), + [ST_I_PX_DRESP] = ME_NEW_PX("dresp", FN_COUNTER, FF_U64, denied_resp, STATS_PX_CAP_LFBS, "Total number of denied responses since process started"), + [ST_I_PX_EREQ] = ME_NEW_FE("ereq", FN_COUNTER, FF_U64, failed_req, STATS_PX_CAP_LF__, "Total number of invalid requests since process started"), + [ST_I_PX_ECON] = ME_NEW_BE("econ", FN_COUNTER, FF_U64, failed_conns, STATS_PX_CAP___BS, "Total number of failed connections to server since the worker process started"), + [ST_I_PX_ERESP] = ME_NEW_BE("eresp", FN_COUNTER, FF_U64, failed_resp, STATS_PX_CAP___BS, "Total number of invalid responses since the worker process started"), + [ST_I_PX_WRETR] = ME_NEW_BE("wretr", FN_COUNTER, FF_U64, retries, STATS_PX_CAP___BS, "Total number of server connection retries since the worker process started"), + [ST_I_PX_WREDIS] = ME_NEW_BE("wredis", FN_COUNTER, FF_U64, redispatches, STATS_PX_CAP___BS, "Total number of server redispatches due to connection failures since the worker process started"), + [ST_I_PX_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" }, + [ST_I_PX_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" }, + [ST_I_PX_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" }, + [ST_I_PX_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" }, + [ST_I_PX_CHKFAIL] = ME_NEW_BE("chkfail", FN_COUNTER, FF_U64, failed_checks, STATS_PX_CAP____S, "Total number of failed individual health checks per server/backend, since the worker process started"), + [ST_I_PX_CHKDOWN] = ME_NEW_BE("chkdown", FN_COUNTER, FF_U64, down_trans, STATS_PX_CAP___BS, "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started"), + [ST_I_PX_LASTCHG] = ME_NEW_BE("lastchg", FN_AGE, FF_U32, last_change, STATS_PX_CAP___BS, "How long ago the last server state changed, in seconds"), + [ST_I_PX_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" }, + [ST_I_PX_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" }, + [ST_I_PX_PID] = { .name = "pid", .desc = "Relative worker process number (1)" }, + [ST_I_PX_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" }, + [ST_I_PX_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" }, + [ST_I_PX_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" }, + [ST_I_PX_LBTOT] = ME_NEW_BE("lbtot", FN_COUNTER, FF_U64, cum_lbconn, STATS_PX_CAP_LFBS, "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)"), + [ST_I_PX_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" }, + [ST_I_PX_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" }, + [ST_I_PX_RATE] = ME_NEW_PX("rate", FN_RATE, FF_U32, sess_per_sec, STATS_PX_CAP__FBS, "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)"), + [ST_I_PX_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" }, + [ST_I_PX_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" }, + [ST_I_PX_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" }, + [ST_I_PX_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" }, + [ST_I_PX_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" }, + [ST_I_PX_HRSP_1XX] = ME_NEW_PX("hrsp_1xx", FN_COUNTER, FF_U64, p.http.rsp[1], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 100-199 returned by this object since the worker process started"), + [ST_I_PX_HRSP_2XX] = ME_NEW_PX("hrsp_2xx", FN_COUNTER, FF_U64, p.http.rsp[2], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 200-299 returned by this object since the worker process started"), + [ST_I_PX_HRSP_3XX] = ME_NEW_PX("hrsp_3xx", FN_COUNTER, FF_U64, p.http.rsp[3], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 300-399 returned by this object since the worker process started"), + [ST_I_PX_HRSP_4XX] = ME_NEW_PX("hrsp_4xx", FN_COUNTER, FF_U64, p.http.rsp[4], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 400-499 returned by this object since the worker process started"), + [ST_I_PX_HRSP_5XX] = ME_NEW_PX("hrsp_5xx", FN_COUNTER, FF_U64, p.http.rsp[5], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 500-599 returned by this object since the worker process started"), + [ST_I_PX_HRSP_OTHER] = ME_NEW_PX("hrsp_other", FN_COUNTER, FF_U64, p.http.rsp[0], STATS_PX_CAP__FBS, "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)"), + [ST_I_PX_HANAFAIL] = ME_NEW_BE("hanafail", FN_COUNTER, FF_U64, failed_hana, STATS_PX_CAP____S, "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched"), + [ST_I_PX_REQ_RATE] = ME_NEW_FE("req_rate", FN_RATE, FF_U32, req_per_sec, STATS_PX_CAP__F__, "Number of HTTP requests processed over the last second on this object"), + [ST_I_PX_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" }, + /* Note: ST_I_PX_REQ_TOT is also diplayed on frontend but does not uses a raw counter value, see me_generate_field() for details. */ + [ST_I_PX_REQ_TOT] = ME_NEW_BE("req_tot", FN_COUNTER, FF_U64, p.http.cum_req, STATS_PX_CAP___BS, "Total number of HTTP requests processed by this object since the worker process started"), + [ST_I_PX_CLI_ABRT] = ME_NEW_BE("cli_abrt", FN_COUNTER, FF_U64, cli_aborts, STATS_PX_CAP_LFBS, "Total number of requests or connections aborted by the client since the worker process started"), + [ST_I_PX_SRV_ABRT] = ME_NEW_BE("srv_abrt", FN_COUNTER, FF_U64, srv_aborts, STATS_PX_CAP_LFBS, "Total number of requests or connections aborted by the server since the worker process started"), + [ST_I_PX_COMP_IN] = ME_NEW_PX("comp_in", FN_COUNTER, FF_U64, comp_in[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes submitted to the HTTP compressor for this object since the worker process started"), + [ST_I_PX_COMP_OUT] = ME_NEW_PX("comp_out", FN_COUNTER, FF_U64, comp_out[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes emitted by the HTTP compressor for this object since the worker process started"), + [ST_I_PX_COMP_BYP] = ME_NEW_PX("comp_byp", FN_COUNTER, FF_U64, comp_byp[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)"), + [ST_I_PX_COMP_RSP] = ME_NEW_PX("comp_rsp", FN_COUNTER, FF_U64, p.http.comp_rsp, STATS_PX_CAP__FB_, "Total number of HTTP responses that were compressed for this object since the worker process started"), + [ST_I_PX_LASTSESS] = ME_NEW_BE("lastsess", FN_AGE, FF_S32, last_sess, STATS_PX_CAP___BS, "How long ago some traffic was seen on this object on this worker process, in seconds"), + [ST_I_PX_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" }, + [ST_I_PX_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" }, + [ST_I_PX_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" }, + [ST_I_PX_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" }, + [ST_I_PX_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" }, + [ST_I_PX_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" }, + [ST_I_PX_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" }, + [ST_I_PX_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" }, + [ST_I_PX_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" }, + [ST_I_PX_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" }, + [ST_I_PX_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" }, + [ST_I_PX_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" }, + [ST_I_PX_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" }, + [ST_I_PX_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" }, + [ST_I_PX_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_CONN_RATE] = ME_NEW_FE("conn_rate", FN_RATE, FF_U32, conn_per_sec, STATS_PX_CAP__F__, "Number of new connections accepted over the last second on the frontend for this worker process"), + [ST_I_PX_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" }, + [ST_I_PX_CONN_TOT] = ME_NEW_FE("conn_tot", FN_COUNTER, FF_U64, cum_conn, STATS_PX_CAP_LF__, "Total number of new connections accepted on this frontend since the worker process started"), + [ST_I_PX_INTERCEPTED] = ME_NEW_FE("intercepted", FN_COUNTER, FF_U64, intercepted_req, STATS_PX_CAP__F__, "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started"), + [ST_I_PX_DCON] = ME_NEW_FE("dcon", FN_COUNTER, FF_U64, denied_conn, STATS_PX_CAP_LF__, "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started"), + [ST_I_PX_DSES] = ME_NEW_FE("dses", FN_COUNTER, FF_U64, denied_sess, STATS_PX_CAP_LF__, "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started"), + [ST_I_PX_WREW] = ME_NEW_PX("wrew", FN_COUNTER, FF_U64, failed_rewrites, STATS_PX_CAP_LFBS, "Total number of failed HTTP header rewrites since the worker process started"), + [ST_I_PX_CONNECT] = ME_NEW_BE("connect", FN_COUNTER, FF_U64, connect, STATS_PX_CAP___BS, "Total number of outgoing connection attempts on this backend/server since the worker process started"), + [ST_I_PX_REUSE] = ME_NEW_BE("reuse", FN_COUNTER, FF_U64, reuse, STATS_PX_CAP___BS, "Total number of reused connection on this backend/server since the worker process started"), + [ST_I_PX_CACHE_LOOKUPS] = ME_NEW_PX("cache_lookups", FN_COUNTER, FF_U64, p.http.cache_lookups, STATS_PX_CAP__FB_, "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started"), + [ST_I_PX_CACHE_HITS] = ME_NEW_PX("cache_hits", FN_COUNTER, FF_U64, p.http.cache_hits, STATS_PX_CAP__FB_, "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started"), + [ST_I_PX_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" }, + [ST_I_PX_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" }, + [ST_I_PX_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" }, + [ST_I_PX_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" }, + [ST_I_PX_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" }, + [ST_I_PX_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" }, + [ST_I_PX_EINT] = ME_NEW_PX("eint", FN_COUNTER, FF_U64, internal_errors, STATS_PX_CAP_LFBS, "Total number of internal errors since process started"), + [ST_I_PX_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"}, + [ST_I_PX_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"}, + [ST_I_PX_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"}, + [ST_I_PX_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"}, + [ST_I_PX_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" }, + [ST_I_PX_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "[DEPRECATED] Backend's aggregated gauge of servers' status" }, + [ST_I_PX_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" }, + [ST_I_PX_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" }, + [ST_I_PX_SRID] = { .name = "srid", .desc = "Server id revision, to prevent server id reuse mixups" }, + [ST_I_PX_SESS_OTHER] = { .name = "sess_other", .desc = "Total number of sessions other than HTTP since process started" }, + [ST_I_PX_H1SESS] = ME_NEW_FE("h1sess", FN_COUNTER, FF_U64, cum_sess_ver[0], STATS_PX_CAP__F__, "Total number of HTTP/1 sessions since process started"), + [ST_I_PX_H2SESS] = ME_NEW_FE("h2sess", FN_COUNTER, FF_U64, cum_sess_ver[1], STATS_PX_CAP__F__, "Total number of HTTP/2 sessions since process started"), + [ST_I_PX_H3SESS] = ME_NEW_FE("h3sess", FN_COUNTER, FF_U64, cum_sess_ver[2], STATS_PX_CAP__F__, "Total number of HTTP/3 sessions since process started"), + [ST_I_PX_REQ_OTHER] = ME_NEW_FE("req_other", FN_COUNTER, FF_U64, p.http.cum_req[0], STATS_PX_CAP__F__, "Total number of sessions other than HTTP processed by this object since the worker process started"), + [ST_I_PX_H1REQ] = ME_NEW_FE("h1req", FN_COUNTER, FF_U64, p.http.cum_req[1], STATS_PX_CAP__F__, "Total number of HTTP/1 sessions processed by this object since the worker process started"), + [ST_I_PX_H2REQ] = ME_NEW_FE("h2req", FN_COUNTER, FF_U64, p.http.cum_req[2], STATS_PX_CAP__F__, "Total number of hTTP/2 sessions processed by this object since the worker process started"), + [ST_I_PX_H3REQ] = ME_NEW_FE("h3req", FN_COUNTER, FF_U64, p.http.cum_req[3], STATS_PX_CAP__F__, "Total number of HTTP/3 sessions processed by this object since the worker process started"), + [ST_I_PX_PROTO] = { .name = "proto", .desc = "Protocol" }, +}; + +/* Returns true if column at <idx> should be hidden. + * This may depends on various <objt> internal status. + */ +static int stcol_hide(enum stat_idx_px idx, enum obj_type *objt) +{ + struct proxy *px; + struct server *srv = NULL, *ref; + struct listener *li = NULL; + + switch (obj_type(objt)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(objt); + break; + case OBJ_TYPE_SERVER: + srv = __objt_server(objt); + px = srv->proxy; + break; + case OBJ_TYPE_LISTENER: + li = __objt_listener(objt); + px = li->bind_conf->frontend; + break; + default: + ABORT_NOW(); + return 0; + } + + switch (idx) { + case ST_I_PX_HRSP_1XX: + case ST_I_PX_HRSP_2XX: + case ST_I_PX_HRSP_3XX: + case ST_I_PX_HRSP_4XX: + case ST_I_PX_HRSP_5XX: + case ST_I_PX_REQ_TOT: + case ST_I_PX_INTERCEPTED: + case ST_I_PX_CACHE_LOOKUPS: + case ST_I_PX_CACHE_HITS: + return px->mode != PR_MODE_HTTP; + + case ST_I_PX_CHKFAIL: + case ST_I_PX_CHKDOWN: + return srv && !(srv->check.state & CHK_ST_ENABLED); + + case ST_I_PX_HANAFAIL: + BUG_ON(!srv); /* HANAFAIL is only defined for server scope */ + + ref = srv->track ? srv->track : srv; + while (ref->track) + ref = ref->track; + return !ref->observe; + + case ST_I_PX_LASTSESS: + if (srv) + return !srv->counters.last_sess; + else if (px) + return !px->be_counters.last_sess; + else + return 0; + + default: + return 0; + } +} + +/* Generate if possible a metric value from <col>. <cap> must be set to one of + * STATS_PX_CAP_* values to check if the metric is available for this object + * type. <stat_file> must be set when dumping stats-file. Metric value will be + * extracted from <counters>. + * + * Returns a field metric. + */ +static struct field me_generate_field(const struct stat_col *col, + enum stat_idx_px idx, enum obj_type *objt, + const void *counters, uint8_t cap, + int stat_file) +{ + enum field_nature fn; + struct field value; + void *counter = NULL; + int wrong_side = 0; + + /* Only generic stat column must be used as input. */ + BUG_ON(!stcol_is_generic(col)); + + fn = stcol_nature(col); + + switch (cap) { + case STATS_PX_CAP_FE: + case STATS_PX_CAP_LI: + counter = (char *)counters + col->metric.offset[0]; + wrong_side = !(col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI)); + break; + + case STATS_PX_CAP_BE: + case STATS_PX_CAP_SRV: + counter = (char *)counters + col->metric.offset[1]; + wrong_side = !(col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)); + break; + + default: + /* invalid cap requested */ + ABORT_NOW(); + } + + /* TODO Special case needed for ST_I_PX_REQ_TOT. It is defined as a + * generic column for backend side. Extra code required to diplay it on + * frontend side as an aggregate of values splitted by HTTP version. + */ + if (idx == ST_I_PX_REQ_TOT && cap == STATS_PX_CAP_FE && !stat_file) { + struct proxy *px = __objt_proxy(objt); + const size_t nb_reqs = + sizeof(px->fe_counters.p.http.cum_req) / + sizeof(*px->fe_counters.p.http.cum_req); + uint64_t total_req = 0; + int i; + + for (i = 0; i < nb_reqs; i++) + total_req += px->fe_counters.p.http.cum_req[i]; + return mkf_u64(FN_COUNTER, total_req); + } + + if (stat_file) { + /* stats-file emits separately frontend and backend stats. + * Skip metric if not defined for any object on the cap side. + */ + if (wrong_side) + return (struct field){ .type = FF_EMPTY }; + } + else { + /* Ensure metric is defined for the current cap. */ + if (!(col->cap & cap)) + return (struct field){ .type = FF_EMPTY }; + + if (stcol_hide(idx, objt)) { + if (fn == FN_AGE) + return mkf_s32(FN_AGE, -1); + else + return (struct field){ .type = FF_EMPTY }; + } + } + + if (fn == FN_COUNTER) { + switch (stcol_format(col)) { + case FF_U64: + value = mkf_u64(FN_COUNTER, *(uint64_t *)counter); + break; + default: + /* only FF_U64 counters currently use generic metric calculation */ + ABORT_NOW(); + } + } + else if (fn == FN_RATE) { + /* freq-ctr always uses FF_U32 */ + BUG_ON(stcol_format(col) != FF_U32); + value = mkf_u32(FN_RATE, read_freq_ctr(counter)); + } + else if (fn == FN_AGE) { + unsigned long age = *(unsigned long *)counter; + if (age) + age = ns_to_sec(now_ns) - age; + + switch (stcol_format(col)) { + case FF_U32: + value = mkf_u32(FN_AGE, age); + break; + case FF_S32: + value = mkf_s32(FN_AGE, age); + break; + default: + /* only FF_U32/FF+S32 for age as generic stat column */ + ABORT_NOW(); + } + } + else { + /* No generic column available for other field nature. */ + ABORT_NOW(); + } + + return value; +} + +/* Fill <line> with the frontend statistics. <line> is preallocated array of + * length <len>. If <index> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for frontends, the + * function returns 0, otherwise, it returns 1. + */ +int stats_fill_fe_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + + if (len < ST_I_PX_MAX) + return 0; + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &px->obj_type, + &px->fe_counters, STATS_PX_CAP_FE, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND"); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, px->feconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, px->fe_counters.conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn); + break; + case ST_I_PX_STATUS: { + const char *state; + + if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) + state = "STOP"; + else if (px->flags & PR_FL_PAUSED) + state = "PAUSED"; + else + state = "OPEN"; + field = mkf_str(FO_STATUS, state); + break; + } + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, 0); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE); + break; + case ST_I_PX_RATE_LIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.sps_max); + break; + case ST_I_PX_REQ_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max); + break; + case ST_I_PX_CONN_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.cps_max); + break; + case ST_I_PX_SESS_OTHER: { + int i; + uint64_t total_sess; + size_t nb_sess = + sizeof(px->fe_counters.cum_sess_ver) / sizeof(*px->fe_counters.cum_sess_ver); + + total_sess = px->fe_counters.cum_sess; + for (i = 0; i < nb_sess; i++) + total_sess -= px->fe_counters.cum_sess_ver[i]; + total_sess = (int64_t)total_sess < 0 ? 0 : total_sess; + field = mkf_u64(FN_COUNTER, total_sess); + break; + } + default: + /* not used for frontends. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a frontend's line to chunk ctx buffer for the current proxy <px> and + * uses the state from stream connector <sc>. The caller is responsible for + * clearing chunk ctx buffer if needed. Returns non-zero if it emits anything, + * zero otherwise. + */ +static int stats_dump_fe_line(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + if (!(px->cap & PR_CAP_FE)) + return 0; + + if ((ctx->flags & STAT_F_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE))) + return 0; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_fe_line(px, ctx->flags, line, ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* Fill <line> with the listener statistics. <line> is preallocated array of + * length <len>. The length of the array must be at least ST_I_PX_MAX. If + * this length is less then this value, the function returns 0, otherwise, it + * returns 1. If selected_field is != NULL, only fill this one. <flags> can + * take the value STAT_F_SHLGNDS. + */ +int stats_fill_li_line(struct proxy *px, struct listener *l, int flags, + struct field *line, int len, enum stat_idx_px *selected_field) +{ + enum stat_idx_px i = (selected_field != NULL ? *selected_field : 0); + struct buffer *out = get_trash_chunk(); + + if (len < ST_I_PX_MAX) + return 0; + + if (!l->counters) + return 0; + + chunk_reset(out); + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &l->obj_type, + l->counters, STATS_PX_CAP_LI, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, l->nbconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, l->counters->conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, l->bind_conf->maxconn); + break; + case ST_I_PX_STATUS: + field = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, l->luid); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO); + break; + case ST_I_PX_ADDR: + if (flags & STAT_F_SHLGNDS) { + char str[INET6_ADDRSTRLEN]; + int port; + + port = get_host_port(&l->rx.addr); + switch (addr_to_str(&l->rx.addr, str, sizeof(str))) { + case AF_INET: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "%s:%d", str, port); + break; + case AF_INET6: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "[%s]:%d", str, port); + break; + case AF_UNIX: + field = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); + break; + case -1: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_strcat(out, strerror(errno)); + break; + default: /* address family not supported */ + break; + } + } + break; + case ST_I_PX_PROTO: + field = mkf_str(FO_STATUS, l->rx.proto->name); + break; + default: + /* not used for listen. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + } + line[i] = field; + if (selected_field != NULL) + break; + } + return 1; +} + +/* Dumps a line for listener <l> and proxy <px> to chunk ctx buffer and uses + * the state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. Returns non-zero if it emits anything, zero + * otherwise. + */ +static int stats_dump_li_line(struct stconn *sc, struct proxy *px, struct listener *l) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_li_line(px, l, ctx->flags, line, + ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(l->extra_counters, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +enum srv_stats_state { + SRV_STATS_STATE_DOWN = 0, + SRV_STATS_STATE_DOWN_AGENT, + SRV_STATS_STATE_GOING_UP, + SRV_STATS_STATE_UP_GOING_DOWN, + SRV_STATS_STATE_UP, + SRV_STATS_STATE_NOLB_GOING_DOWN, + SRV_STATS_STATE_NOLB, + SRV_STATS_STATE_DRAIN_GOING_DOWN, + SRV_STATS_STATE_DRAIN, + SRV_STATS_STATE_DRAIN_AGENT, + SRV_STATS_STATE_NO_CHECK, + + SRV_STATS_STATE_COUNT, /* Must be last */ +}; + +static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = { + [SRV_STATS_STATE_DOWN] = "DOWN", + [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)", + [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d", + [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d", + [SRV_STATS_STATE_UP] = "UP", + [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d", + [SRV_STATS_STATE_NOLB] = "NOLB", + [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d", + [SRV_STATS_STATE_DRAIN] = "DRAIN", + [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)", + [SRV_STATS_STATE_NO_CHECK] = "no check" +}; + +/* Compute server state helper + */ +static void stats_fill_sv_computestate(struct server *sv, struct server *ref, + enum srv_stats_state *state) +{ + if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) { + if ((ref->check.state & CHK_ST_ENABLED) && + (ref->check.health < ref->check.rise + ref->check.fall - 1)) { + *state = SRV_STATS_STATE_UP_GOING_DOWN; + } else { + *state = SRV_STATS_STATE_UP; + } + + if (sv->cur_admin & SRV_ADMF_DRAIN) { + if (ref->agent.state & CHK_ST_ENABLED) + *state = SRV_STATS_STATE_DRAIN_AGENT; + else if (*state == SRV_STATS_STATE_UP_GOING_DOWN) + *state = SRV_STATS_STATE_DRAIN_GOING_DOWN; + else + *state = SRV_STATS_STATE_DRAIN; + } + + if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) { + *state = SRV_STATS_STATE_NO_CHECK; + } + } + else if (sv->cur_state == SRV_ST_STOPPING) { + if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) || + (ref->check.health == ref->check.rise + ref->check.fall - 1)) { + *state = SRV_STATS_STATE_NOLB; + } else { + *state = SRV_STATS_STATE_NOLB_GOING_DOWN; + } + } + else { /* stopped */ + if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) { + *state = SRV_STATS_STATE_DOWN_AGENT; + } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) { + *state = SRV_STATS_STATE_DOWN; /* DOWN */ + } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) { + *state = SRV_STATS_STATE_GOING_UP; + } else { + *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */ + } + } +} + +/* Fill <line> with the backend statistics. <line> is preallocated array of + * length <len>. If <selected_field> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for servers, the + * function returns 0, otherwise, it returns 1. <flags> can take the value + * STAT_F_SHLGNDS. + */ +int stats_fill_sv_line(struct proxy *px, struct server *sv, int flags, + struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + struct server *via = sv->track ? sv->track : sv; + struct server *ref = via; + enum srv_stats_state state = 0; + char str[INET6_ADDRSTRLEN]; + struct buffer *out = get_trash_chunk(); + char *fld_status; + long long srv_samples_counter; + unsigned int srv_samples_window = TIME_STATS_SAMPLES; + + if (len < ST_I_PX_MAX) + return 0; + + chunk_reset(out); + + /* compute state for later use */ + if (!index || *index == ST_I_PX_STATUS || + *index == ST_I_PX_CHECK_RISE || *index == ST_I_PX_CHECK_FALL || + *index == ST_I_PX_CHECK_HEALTH || *index == ST_I_PX_HANAFAIL) { + /* we have "via" which is the tracked server as described in the configuration, + * and "ref" which is the checked server and the end of the chain. + */ + while (ref->track) + ref = ref->track; + stats_fill_sv_computestate(sv, ref, &state); + } + + /* compue time values for later use */ + if (index == NULL || *index == ST_I_PX_QTIME || + *index == ST_I_PX_CTIME || *index == ST_I_PX_RTIME || + *index == ST_I_PX_TTIME) { + srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn; + if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0) + srv_samples_window = srv_samples_counter; + } + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &sv->obj_type, + &sv->counters, STATS_PX_CAP_SRV, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_QCUR: + field = mkf_u32(0, sv->queue.length); + break; + case ST_I_PX_QMAX: + field = mkf_u32(FN_MAX, sv->counters.nbpend_max); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, sv->cur_sess); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, sv->counters.cur_sess_max); + break; + case ST_I_PX_SLIM: + if (sv->maxconn) + field = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn); + break; + case ST_I_PX_SRV_ICUR: + field = mkf_u32(0, sv->curr_idle_conns); + break; + case ST_I_PX_SRV_ILIM: + if (sv->max_idle_conns != -1) + field = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns); + break; + case ST_I_PX_IDLE_CONN_CUR: + field = mkf_u32(0, sv->curr_idle_nb); + break; + case ST_I_PX_SAFE_CONN_CUR: + field = mkf_u32(0, sv->curr_safe_nb); + break; + case ST_I_PX_USED_CONN_CUR: + field = mkf_u32(0, sv->curr_used_conns); + break; + case ST_I_PX_NEED_CONN_EST: + field = mkf_u32(0, sv->est_need_conns); + break; + case ST_I_PX_STATUS: + fld_status = chunk_newstr(out); + if (sv->cur_admin & SRV_ADMF_RMAINT) + chunk_appendf(out, "MAINT (resolution)"); + else if (sv->cur_admin & SRV_ADMF_IMAINT) + chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id); + else if (sv->cur_admin & SRV_ADMF_MAINT) + chunk_appendf(out, "MAINT"); + else + chunk_appendf(out, + srv_hlt_st[state], + (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), + (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise)); + + field = mkf_str(FO_STATUS, fld_status); + break; + case ST_I_PX_WEIGHT: + field = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); + break; + case ST_I_PX_UWEIGHT: + field = mkf_u32(FN_AVG, sv->uweight); + break; + case ST_I_PX_ACT: + field = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1); + break; + case ST_I_PX_BCK: + field = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0); + break; + case ST_I_PX_DOWNTIME: + if (sv->check.state & CHK_ST_ENABLED) + field = mkf_u32(FN_COUNTER, srv_downtime(sv)); + break; + case ST_I_PX_QLIMIT: + if (sv->maxqueue) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, sv->puid); + break; + case ST_I_PX_SRID: + field = mkf_u32(FN_COUNTER, sv->rid); + break; + case ST_I_PX_THROTTLE: + if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv)) + field = mkf_u32(FN_AVG, server_throttle_rate(sv)); + break; + case ST_I_PX_TRACKED: + if (sv->track) { + char *fld_track = chunk_newstr(out); + chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id); + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track); + } + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(FN_MAX, sv->counters.sps_max); + break; + case ST_I_PX_CHECK_STATUS: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { + const char *fld_chksts; + + fld_chksts = chunk_newstr(out); + chunk_strcat(out, "* "); // for check in progress + chunk_strcat(out, get_check_status_info(sv->check.status)); + if (!(sv->check.state & CHK_ST_INPROGRESS)) + fld_chksts += 2; // skip "* " + field = mkf_str(FN_OUTPUT, fld_chksts); + } + break; + case ST_I_PX_CHECK_CODE: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + sv->check.status >= HCHK_STATUS_L57DATA) + field = mkf_u32(FN_OUTPUT, sv->check.code); + break; + case ST_I_PX_CHECK_DURATION: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + sv->check.status >= HCHK_STATUS_CHECKED) + field = mkf_u64(FN_DURATION, MAX(sv->check.duration, 0)); + break; + case ST_I_PX_CHECK_DESC: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status)); + break; + case ST_I_PX_LAST_CHK: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, sv->check.desc); + break; + case ST_I_PX_CHECK_RISE: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise); + break; + case ST_I_PX_CHECK_FALL: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall); + break; + case ST_I_PX_CHECK_HEALTH: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health); + break; + case ST_I_PX_AGENT_STATUS: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { + const char *fld_chksts; + + fld_chksts = chunk_newstr(out); + chunk_strcat(out, "* "); // for check in progress + chunk_strcat(out, get_check_status_info(sv->agent.status)); + if (!(sv->agent.state & CHK_ST_INPROGRESS)) + fld_chksts += 2; // skip "* " + field = mkf_str(FN_OUTPUT, fld_chksts); + } + break; + case ST_I_PX_AGENT_CODE: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + (sv->agent.status >= HCHK_STATUS_L57DATA)) + field = mkf_u32(FN_OUTPUT, sv->agent.code); + break; + case ST_I_PX_AGENT_DURATION: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u64(FN_DURATION, sv->agent.duration); + break; + case ST_I_PX_AGENT_DESC: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status)); + break; + case ST_I_PX_LAST_AGT: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, sv->agent.desc); + break; + case ST_I_PX_AGENT_RISE: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise); + break; + case ST_I_PX_AGENT_FALL: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall); + break; + case ST_I_PX_AGENT_HEALTH: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health); + break; + case ST_I_PX_QTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window)); + break; + case ST_I_PX_CTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window)); + break; + case ST_I_PX_RTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window)); + break; + case ST_I_PX_TTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window)); + break; + case ST_I_PX_QT_MAX: + field = mkf_u32(FN_MAX, sv->counters.qtime_max); + break; + case ST_I_PX_CT_MAX: + field = mkf_u32(FN_MAX, sv->counters.ctime_max); + break; + case ST_I_PX_RT_MAX: + field = mkf_u32(FN_MAX, sv->counters.dtime_max); + break; + case ST_I_PX_TT_MAX: + field = mkf_u32(FN_MAX, sv->counters.ttime_max); + break; + case ST_I_PX_ADDR: + if (flags & STAT_F_SHLGNDS) { + switch (addr_to_str(&sv->addr, str, sizeof(str))) { + case AF_INET: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "%s:%d", str, sv->svc_port); + break; + case AF_INET6: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "[%s]:%d", str, sv->svc_port); + break; + case AF_UNIX: + field = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); + break; + case -1: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_strcat(out, strerror(errno)); + break; + default: /* address family not supported */ + break; + } + } + break; + case ST_I_PX_COOKIE: + if (flags & STAT_F_SHLGNDS && sv->cookie) + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie); + break; + default: + /* not used for servers. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a line for server <sv> and proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>, and server state <state>. The caller is + * responsible for clearing the chunk ctx buffer if needed. Returns non-zero if + * it emits anything, zero otherwise. + */ +static int stats_dump_sv_line(struct stconn *sc, struct proxy *px, struct server *sv) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct stats_module *mod; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + size_t stats_count = ST_I_PX_MAX; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_sv_line(px, sv, ctx->flags, line, + ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* Helper to compute srv values for a given backend + */ +static void stats_fill_be_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw) +{ + int nbup_tmp, nbsrv_tmp, totuw_tmp; + const struct server *srv; + + nbup_tmp = nbsrv_tmp = totuw_tmp = 0; + for (srv = px->srv; srv; srv = srv->next) { + if (srv->cur_state != SRV_ST_STOPPED) { + nbup_tmp++; + if (srv_currently_usable(srv) && + (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP))) + totuw_tmp += srv->uweight; + } + nbsrv_tmp++; + } + + HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); + if (!px->srv_act && px->lbprm.fbck) + totuw_tmp = px->lbprm.fbck->uweight; + HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); + + /* use tmp variable then assign result to make gcc happy */ + *nbup = nbup_tmp; + *nbsrv = nbsrv_tmp; + *totuw = totuw_tmp; +} + +/* Fill <line> with the backend statistics. <line> is preallocated array of + * length <len>. If <index> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for backends, the + * function returns 0, otherwise, it returns 1. <flags> can take the value + * STAT_F_SHLGNDS. + */ +int stats_fill_be_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + long long be_samples_counter; + unsigned int be_samples_window = TIME_STATS_SAMPLES; + struct buffer *out = get_trash_chunk(); + int nbup, nbsrv, totuw; + char *fld; + + if (len < ST_I_PX_MAX) + return 0; + + nbup = nbsrv = totuw = 0; + /* some srv values compute for later if we either select all fields or + * need them for one of the mentioned ones */ + if (!index || *index == ST_I_PX_STATUS || + *index == ST_I_PX_UWEIGHT) + stats_fill_be_computesrv(px, &nbup, &nbsrv, &totuw); + + /* same here but specific to time fields */ + if (!index || *index == ST_I_PX_QTIME || + *index == ST_I_PX_CTIME || *index == ST_I_PX_RTIME || + *index == ST_I_PX_TTIME) { + be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn; + if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0) + be_samples_window = be_samples_counter; + } + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &px->obj_type, + &px->be_counters, STATS_PX_CAP_BE, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND"); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_QCUR: + field = mkf_u32(0, px->queue.length); + break; + case ST_I_PX_QMAX: + field = mkf_u32(FN_MAX, px->be_counters.nbpend_max); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, px->beconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, px->be_counters.conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn); + break; + case ST_I_PX_STATUS: + fld = chunk_newstr(out); + chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN"); + if (flags & (STAT_F_HIDE_MAINT|STAT_F_HIDE_DOWN)) + chunk_appendf(out, " (%d/%d)", nbup, nbsrv); + field = mkf_str(FO_STATUS, fld); + break; + case ST_I_PX_AGG_SRV_CHECK_STATUS: // DEPRECATED + case ST_I_PX_AGG_SRV_STATUS: + field = mkf_u32(FN_GAUGE, 0); + break; + case ST_I_PX_AGG_CHECK_STATUS: + field = mkf_u32(FN_GAUGE, 0); + break; + case ST_I_PX_WEIGHT: + field = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); + break; + case ST_I_PX_UWEIGHT: + field = mkf_u32(FN_AVG, totuw); + break; + case ST_I_PX_ACT: + field = mkf_u32(0, px->srv_act); + break; + case ST_I_PX_BCK: + field = mkf_u32(0, px->srv_bck); + break; + case ST_I_PX_DOWNTIME: + if (px->srv) + field = mkf_u32(FN_COUNTER, be_downtime(px)); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, 0); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(0, px->be_counters.sps_max); + break; + case ST_I_PX_COOKIE: + if (flags & STAT_F_SHLGNDS && px->cookie_name) + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name); + break; + case ST_I_PX_ALGO: + if (flags & STAT_F_SHLGNDS) + field = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO)); + break; + case ST_I_PX_QTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window)); + break; + case ST_I_PX_CTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window)); + break; + case ST_I_PX_RTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window)); + break; + case ST_I_PX_TTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window)); + break; + case ST_I_PX_QT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.qtime_max); + break; + case ST_I_PX_CT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.ctime_max); + break; + case ST_I_PX_RT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.dtime_max); + break; + case ST_I_PX_TT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.ttime_max); + break; + default: + /* not used for backends. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a line for backend <px> to chunk ctx buffer and uses the state from + * stream interface <si>. The caller is responsible for clearing chunk buffer + * if needed. Returns non-zero if it emits anything, zero otherwise. + */ +static int stats_dump_be_line(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + if (!(px->cap & PR_CAP_BE)) + return 0; + + if ((ctx->flags & STAT_F_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE))) + return 0; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_be_line(px, ctx->flags, line, ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + struct extra_counters *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* + * Dumps statistics for a proxy. The output is sent to the stream connector's + * input buffer. Returns 0 if it had to stop dumping data because of lack of + * buffer space, or non-zero if everything completed. This function is used + * both by the CLI and the HTTP entry points, and is able to dump the output + * in HTML or CSV formats. + */ +static int stats_dump_proxy_to_buffer(struct stconn *sc, struct buffer *buf, + struct htx *htx, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + struct server *sv, *svs; /* server and server-state, server-state=server or server->track */ + struct listener *l; + struct uri_auth *uri = NULL; + int current_field; + int px_st = ctx->px_st; + + if (ctx->http_px) + uri = ctx->http_px->uri_auth; + chunk_reset(chk); +more: + current_field = ctx->field; + + switch (ctx->px_st) { + case STAT_PX_ST_INIT: + /* we are on a new proxy */ + if (uri && uri->scope) { + /* we have a limited scope, we have to check the proxy name */ + struct stat_scope *scope; + int len; + + len = strlen(px->id); + scope = uri->scope; + + while (scope) { + /* match exact proxy name */ + if (scope->px_len == len && !memcmp(px->id, scope->px_id, len)) + break; + + /* match '.' which means 'self' proxy */ + if (strcmp(scope->px_id, ".") == 0 && px == ctx->http_px) + break; + scope = scope->next; + } + + /* proxy name not found : don't dump anything */ + if (scope == NULL) + return 1; + } + + /* if the user has requested a limited output and the proxy + * name does not match, skip it. + */ + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL) + return 1; + } + + if ((ctx->flags & STAT_F_BOUND) && + (ctx->iid != -1) && + (px->uuid != ctx->iid)) + return 1; + + ctx->px_st = STAT_PX_ST_TH; + __fallthrough; + + case STAT_PX_ST_TH: + if (ctx->flags & STAT_F_FMT_HTML) { + stats_dump_html_px_hdr(sc, px); + if (!stats_putchk(appctx, buf, htx)) + goto full; + } + + ctx->px_st = STAT_PX_ST_FE; + __fallthrough; + + case STAT_PX_ST_FE: + /* print the frontend */ + if (stats_dump_fe_line(sc, px)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + + current_field = 0; + ctx->obj2 = px->conf.listeners.n; + ctx->px_st = STAT_PX_ST_LI; + __fallthrough; + + case STAT_PX_ST_LI: + /* obj2 points to listeners list as initialized above */ + for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) { + if (stats_is_full(appctx, buf, htx)) + goto full; + + l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); + if (!l->counters) + continue; + + if (ctx->flags & STAT_F_BOUND) { + if (!(ctx->type & (1 << STATS_TYPE_SO))) + break; + + if (ctx->sid != -1 && l->luid != ctx->sid) + continue; + } + + /* print the frontend */ + if (stats_dump_li_line(sc, px, l)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + current_field = 0; + } + + ctx->obj2 = px->srv; /* may be NULL */ + ctx->px_st = STAT_PX_ST_SV; + __fallthrough; + + case STAT_PX_ST_SV: + /* check for dump resumption */ + if (px_st == STAT_PX_ST_SV) { + struct server *cur = ctx->obj2; + + /* re-entrant dump */ + BUG_ON(!cur); + if (cur->flags & SRV_F_DELETED) { + /* the server could have been marked as deleted + * between two dumping attempts, skip it. + */ + cur = cur->next; + } + srv_drop(ctx->obj2); /* drop old srv taken on last dumping attempt */ + ctx->obj2 = cur; /* could be NULL */ + /* back to normal */ + } + + /* obj2 points to servers list as initialized above. + * + * A server may be removed during the stats dumping. + * Temporarily increment its refcount to prevent its + * anticipated cleaning. Call srv_drop() to release it. + */ + for (; ctx->obj2 != NULL; + ctx->obj2 = srv_drop(sv)) { + + sv = ctx->obj2; + srv_take(sv); + + if (stats_is_full(appctx, buf, htx)) + goto full; + + if (ctx->flags & STAT_F_BOUND) { + if (!(ctx->type & (1 << STATS_TYPE_SV))) { + srv_drop(sv); + break; + } + + if (ctx->sid != -1 && sv->puid != ctx->sid) + continue; + } + + /* do not report disabled servers */ + if (ctx->flags & STAT_F_HIDE_MAINT && + sv->cur_admin & SRV_ADMF_MAINT) { + continue; + } + + svs = sv; + while (svs->track) + svs = svs->track; + + /* do not report servers which are DOWN and not changing state */ + if ((ctx->flags & STAT_F_HIDE_DOWN) && + ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */ + (sv->cur_state == SRV_ST_STOPPED && /* server is down */ + (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) || + ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) || + ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) { + continue; + } + + if (stats_dump_sv_line(sc, px, sv)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + current_field = 0; + } /* for sv */ + + ctx->px_st = STAT_PX_ST_BE; + __fallthrough; + + case STAT_PX_ST_BE: + /* print the backend */ + if (stats_dump_be_line(sc, px)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + + current_field = 0; + ctx->px_st = STAT_PX_ST_END; + __fallthrough; + + case STAT_PX_ST_END: + if (ctx->flags & STAT_F_FMT_HTML) { + stats_dump_html_px_end(sc, px); + if (!stats_putchk(appctx, buf, htx)) + goto full; + } + + ctx->px_st = STAT_PX_ST_FIN; + __fallthrough; + + case STAT_PX_ST_FIN: + return 1; + + default: + /* unknown state, we should put an abort() here ! */ + return 1; + } + + full: + /* restore previous field */ + ctx->field = current_field; + return 0; +} + +/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as + * a pointer to the current server/listener. + */ +int stats_dump_proxies(struct stconn *sc, struct buffer *buf, struct htx *htx) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct proxy *px; + + /* dump proxies */ + while (ctx->obj1) { + if (stats_is_full(appctx, buf, htx)) + goto full; + + px = ctx->obj1; + /* Skip the global frontend proxies and non-networked ones. + * Also skip proxies that were disabled in the configuration + * This change allows retrieving stats from "old" proxies after a reload. + */ + if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 && + (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) { + if (stats_dump_proxy_to_buffer(sc, buf, htx, px) == 0) + return 0; + } + + ctx->obj1 = px->next; + ctx->px_st = STAT_PX_ST_INIT; + ctx->field = 0; + } + + return 1; + + full: + return 0; +} + +void proxy_stats_clear_counters(int clrall, struct list *stat_modules) +{ + struct proxy *px; + struct server *sv; + struct listener *li; + struct stats_module *mod; + + for (px = proxies_list; px; px = px->next) { + if (clrall) { + memset(&px->be_counters, 0, sizeof(px->be_counters)); + memset(&px->fe_counters, 0, sizeof(px->fe_counters)); + } + else { + px->be_counters.conn_max = 0; + px->be_counters.p.http.rps_max = 0; + px->be_counters.sps_max = 0; + px->be_counters.cps_max = 0; + px->be_counters.nbpend_max = 0; + px->be_counters.qtime_max = 0; + px->be_counters.ctime_max = 0; + px->be_counters.dtime_max = 0; + px->be_counters.ttime_max = 0; + + px->fe_counters.conn_max = 0; + px->fe_counters.p.http.rps_max = 0; + px->fe_counters.sps_max = 0; + px->fe_counters.cps_max = 0; + } + + for (sv = px->srv; sv; sv = sv->next) + if (clrall) + memset(&sv->counters, 0, sizeof(sv->counters)); + else { + sv->counters.cur_sess_max = 0; + sv->counters.nbpend_max = 0; + sv->counters.sps_max = 0; + sv->counters.qtime_max = 0; + sv->counters.ctime_max = 0; + sv->counters.dtime_max = 0; + sv->counters.ttime_max = 0; + } + + list_for_each_entry(li, &px->conf.listeners, by_fe) + if (li->counters) { + if (clrall) + memset(li->counters, 0, sizeof(*li->counters)); + else + li->counters->conn_max = 0; + } + } + + list_for_each_entry(mod, stat_modules, list) { + if (!mod->clearable && !clrall) + continue; + + for (px = proxies_list; px; px = px->next) { + enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags); + + if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) { + EXTRA_COUNTERS_INIT(px->extra_counters_fe, + mod, + mod->counters, + mod->counters_size); + } + + if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) { + EXTRA_COUNTERS_INIT(px->extra_counters_be, + mod, + mod->counters, + mod->counters_size); + } + + if (mod_cap & STATS_PX_CAP_SRV) { + for (sv = px->srv; sv; sv = sv->next) { + EXTRA_COUNTERS_INIT(sv->extra_counters, + mod, + mod->counters, + mod->counters_size); + } + } + + if (mod_cap & STATS_PX_CAP_LI) { + list_for_each_entry(li, &px->conf.listeners, by_fe) { + EXTRA_COUNTERS_INIT(li->extra_counters, + mod, + mod->counters, + mod->counters_size); + } + } + } + } +} diff --git a/src/stats.c b/src/stats.c index ac47f00..5db9c26 100644 --- a/src/stats.c +++ b/src/stats.c @@ -12,7 +12,6 @@ */ #include <ctype.h> -#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -23,10 +22,10 @@ #include <sys/stat.h> #include <sys/types.h> +#include <import/ebsttree.h> #include <haproxy/api.h> #include <haproxy/activity.h> #include <haproxy/applet.h> -#include <haproxy/backend.h> #include <haproxy/base64.h> #include <haproxy/cfgparse.h> #include <haproxy/channel.h> @@ -36,9 +35,7 @@ #include <haproxy/compression.h> #include <haproxy/debug.h> #include <haproxy/errors.h> -#include <haproxy/fd.h> #include <haproxy/freq_ctr.h> -#include <haproxy/frontend.h> #include <haproxy/global.h> #include <haproxy/http.h> #include <haproxy/http_ana.h> @@ -57,6 +54,10 @@ #include <haproxy/server.h> #include <haproxy/session.h> #include <haproxy/stats.h> +#include <haproxy/stats-file.h> +#include <haproxy/stats-html.h> +#include <haproxy/stats-json.h> +#include <haproxy/stats-proxy.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/task.h> @@ -66,6 +67,13 @@ #include <haproxy/uri_auth-t.h> #include <haproxy/version.h> +/* Convert stat_col <col> to old-style <name> as name_desc. */ +static void stcol2ndesc(struct name_desc *name, const struct stat_col *col) +{ + name->name = col->name; + name->desc = col->desc; +} + /* status codes available for the stats admin page (strictly 4 chars length) */ const char *stat_status_codes[STAT_STATUS_SIZE] = { @@ -79,267 +87,202 @@ const char *stat_status_codes[STAT_STATUS_SIZE] = { [STAT_STATUS_IVAL] = "IVAL", }; -/* These are the field names for each INF_* field position. Please pay attention +/* These are the column names for each ST_I_INF_* field position. Please pay attention * to always use the exact same name except that the strings for new names must * be lower case or CamelCase while the enum entries must be upper case. */ -const struct name_desc info_fields[INF_TOTAL_FIELDS] = { - [INF_NAME] = { .name = "Name", .desc = "Product name" }, - [INF_VERSION] = { .name = "Version", .desc = "Product version" }, - [INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" }, - [INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" }, - [INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" }, - [INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" }, - [INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" }, - [INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" }, - [INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" }, - [INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" }, - [INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" }, - [INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" }, - [INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" }, - [INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" }, - [INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" }, - [INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" }, - [INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" }, - [INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" }, - [INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" }, - [INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" }, - [INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" }, - [INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" }, - [INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" }, - [INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" }, - [INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" }, - [INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" }, - [INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" }, - [INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" }, - [INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" }, - [INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" }, - [INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" }, - [INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" }, - [INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" }, - [INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" }, - [INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" }, - [INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" }, - [INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" }, - [INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" }, - [INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" }, - [INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" }, - [INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" }, - [INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" }, - [INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" }, - [INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" }, - [INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" }, - [INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" }, - [INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" }, - [INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" }, - [INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" }, - [INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" }, - [INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" }, - [INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" }, - [INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" }, - [INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" }, - [INF_NODE] = { .name = "node", .desc = "Node name (global.node)" }, - [INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" }, - [INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" }, - [INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" }, - [INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" }, - [INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" }, - [INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" }, - [INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" }, - [INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" }, - [INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" }, - [INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" }, - [INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" }, - [INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" }, - [INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" }, - [INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" }, - [INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" }, - [INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" }, - [INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" }, - [INF_WARNINGS] = { .name = "TotalWarnings", .desc = "Total warnings issued" }, - [INF_MAXCONN_REACHED] = { .name = "MaxconnReached", .desc = "Number of times an accepted connection resulted in Maxconn being reached" }, - [INF_BOOTTIME_MS] = { .name = "BootTime_ms", .desc = "How long ago it took to parse and process the config before being ready (milliseconds)" }, - [INF_NICED_TASKS] = { .name = "Niced_tasks", .desc = "Total number of active tasks+tasklets in the current worker process (Run_queue) that are niced" }, -}; - -const struct name_desc stat_fields[ST_F_TOTAL_FIELDS] = { - [ST_F_PXNAME] = { .name = "pxname", .desc = "Proxy name" }, - [ST_F_SVNAME] = { .name = "svname", .desc = "Server name" }, - [ST_F_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" }, - [ST_F_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" }, - [ST_F_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" }, - [ST_F_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" }, - [ST_F_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" }, - [ST_F_STOT] = { .name = "stot", .desc = "Total number of sessions since process started" }, - [ST_F_BIN] = { .name = "bin", .desc = "Total number of request bytes since process started" }, - [ST_F_BOUT] = { .name = "bout", .desc = "Total number of response bytes since process started" }, - [ST_F_DREQ] = { .name = "dreq", .desc = "Total number of denied requests since process started" }, - [ST_F_DRESP] = { .name = "dresp", .desc = "Total number of denied responses since process started" }, - [ST_F_EREQ] = { .name = "ereq", .desc = "Total number of invalid requests since process started" }, - [ST_F_ECON] = { .name = "econ", .desc = "Total number of failed connections to server since the worker process started" }, - [ST_F_ERESP] = { .name = "eresp", .desc = "Total number of invalid responses since the worker process started" }, - [ST_F_WRETR] = { .name = "wretr", .desc = "Total number of server connection retries since the worker process started" }, - [ST_F_WREDIS] = { .name = "wredis", .desc = "Total number of server redispatches due to connection failures since the worker process started" }, - [ST_F_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" }, - [ST_F_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" }, - [ST_F_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" }, - [ST_F_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" }, - [ST_F_CHKFAIL] = { .name = "chkfail", .desc = "Total number of failed individual health checks per server/backend, since the worker process started" }, - [ST_F_CHKDOWN] = { .name = "chkdown", .desc = "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started" }, - [ST_F_LASTCHG] = { .name = "lastchg", .desc = "How long ago the last server state changed, in seconds" }, - [ST_F_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" }, - [ST_F_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" }, - [ST_F_PID] = { .name = "pid", .desc = "Relative worker process number (1)" }, - [ST_F_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" }, - [ST_F_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" }, - [ST_F_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" }, - [ST_F_LBTOT] = { .name = "lbtot", .desc = "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)" }, - [ST_F_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" }, - [ST_F_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" }, - [ST_F_RATE] = { .name = "rate", .desc = "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)" }, - [ST_F_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" }, - [ST_F_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" }, - [ST_F_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" }, - [ST_F_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" }, - [ST_F_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" }, - [ST_F_HRSP_1XX] = { .name = "hrsp_1xx", .desc = "Total number of HTTP responses with status 100-199 returned by this object since the worker process started" }, - [ST_F_HRSP_2XX] = { .name = "hrsp_2xx", .desc = "Total number of HTTP responses with status 200-299 returned by this object since the worker process started" }, - [ST_F_HRSP_3XX] = { .name = "hrsp_3xx", .desc = "Total number of HTTP responses with status 300-399 returned by this object since the worker process started" }, - [ST_F_HRSP_4XX] = { .name = "hrsp_4xx", .desc = "Total number of HTTP responses with status 400-499 returned by this object since the worker process started" }, - [ST_F_HRSP_5XX] = { .name = "hrsp_5xx", .desc = "Total number of HTTP responses with status 500-599 returned by this object since the worker process started" }, - [ST_F_HRSP_OTHER] = { .name = "hrsp_other", .desc = "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)" }, - [ST_F_HANAFAIL] = { .name = "hanafail", .desc = "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched" }, - [ST_F_REQ_RATE] = { .name = "req_rate", .desc = "Number of HTTP requests processed over the last second on this object" }, - [ST_F_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" }, - [ST_F_REQ_TOT] = { .name = "req_tot", .desc = "Total number of HTTP requests processed by this object since the worker process started" }, - [ST_F_CLI_ABRT] = { .name = "cli_abrt", .desc = "Total number of requests or connections aborted by the client since the worker process started" }, - [ST_F_SRV_ABRT] = { .name = "srv_abrt", .desc = "Total number of requests or connections aborted by the server since the worker process started" }, - [ST_F_COMP_IN] = { .name = "comp_in", .desc = "Total number of bytes submitted to the HTTP compressor for this object since the worker process started" }, - [ST_F_COMP_OUT] = { .name = "comp_out", .desc = "Total number of bytes emitted by the HTTP compressor for this object since the worker process started" }, - [ST_F_COMP_BYP] = { .name = "comp_byp", .desc = "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)" }, - [ST_F_COMP_RSP] = { .name = "comp_rsp", .desc = "Total number of HTTP responses that were compressed for this object since the worker process started" }, - [ST_F_LASTSESS] = { .name = "lastsess", .desc = "How long ago some traffic was seen on this object on this worker process, in seconds" }, - [ST_F_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" }, - [ST_F_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" }, - [ST_F_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" }, - [ST_F_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" }, - [ST_F_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" }, - [ST_F_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" }, - [ST_F_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" }, - [ST_F_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" }, - [ST_F_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" }, - [ST_F_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" }, - [ST_F_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" }, - [ST_F_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" }, - [ST_F_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" }, - [ST_F_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" }, - [ST_F_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_CONN_RATE] = { .name = "conn_rate", .desc = "Number of new connections accepted over the last second on the frontend for this worker process" }, - [ST_F_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" }, - [ST_F_CONN_TOT] = { .name = "conn_tot", .desc = "Total number of new connections accepted on this frontend since the worker process started" }, - [ST_F_INTERCEPTED] = { .name = "intercepted", .desc = "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started" }, - [ST_F_DCON] = { .name = "dcon", .desc = "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started" }, - [ST_F_DSES] = { .name = "dses", .desc = "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started" }, - [ST_F_WREW] = { .name = "wrew", .desc = "Total number of failed HTTP header rewrites since the worker process started" }, - [ST_F_CONNECT] = { .name = "connect", .desc = "Total number of outgoing connection attempts on this backend/server since the worker process started" }, - [ST_F_REUSE] = { .name = "reuse", .desc = "Total number of reused connection on this backend/server since the worker process started" }, - [ST_F_CACHE_LOOKUPS] = { .name = "cache_lookups", .desc = "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started" }, - [ST_F_CACHE_HITS] = { .name = "cache_hits", .desc = "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started" }, - [ST_F_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" }, - [ST_F_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" }, - [ST_F_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" }, - [ST_F_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" }, - [ST_F_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" }, - [ST_F_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" }, - [ST_F_EINT] = { .name = "eint", .desc = "Total number of internal errors since process started"}, - [ST_F_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"}, - [ST_F_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"}, - [ST_F_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"}, - [ST_F_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"}, - [ST_F_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" }, - [ST_F_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "[DEPRECATED] Backend's aggregated gauge of servers' status" }, - [ST_F_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" }, - [ST_F_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" }, - [ST_F_SRID] = { .name = "srid", .desc = "Server id revision, to prevent server id reuse mixups" }, - [ST_F_SESS_OTHER] = { .name = "sess_other", .desc = "Total number of sessions other than HTTP since process started" }, - [ST_F_H1SESS] = { .name = "h1sess", .desc = "Total number of HTTP/1 sessions since process started" }, - [ST_F_H2SESS] = { .name = "h2sess", .desc = "Total number of HTTP/2 sessions since process started" }, - [ST_F_H3SESS] = { .name = "h3sess", .desc = "Total number of HTTP/3 sessions since process started" }, - [ST_F_REQ_OTHER] = { .name = "req_other", .desc = "Total number of sessions other than HTTP processed by this object since the worker process started" }, - [ST_F_H1REQ] = { .name = "h1req", .desc = "Total number of HTTP/1 sessions processed by this object since the worker process started" }, - [ST_F_H2REQ] = { .name = "h2req", .desc = "Total number of hTTP/2 sessions processed by this object since the worker process started" }, - [ST_F_H3REQ] = { .name = "h3req", .desc = "Total number of HTTP/3 sessions processed by this object since the worker process started" }, - [ST_F_PROTO] = { .name = "proto", .desc = "Protocol" }, +const struct name_desc stat_cols_info[ST_I_INF_MAX] = { + [ST_I_INF_NAME] = { .name = "Name", .desc = "Product name" }, + [ST_I_INF_VERSION] = { .name = "Version", .desc = "Product version" }, + [ST_I_INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" }, + [ST_I_INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" }, + [ST_I_INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" }, + [ST_I_INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" }, + [ST_I_INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" }, + [ST_I_INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" }, + [ST_I_INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" }, + [ST_I_INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" }, + [ST_I_INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" }, + [ST_I_INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" }, + [ST_I_INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" }, + [ST_I_INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" }, + [ST_I_INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" }, + [ST_I_INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" }, + [ST_I_INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" }, + [ST_I_INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" }, + [ST_I_INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" }, + [ST_I_INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" }, + [ST_I_INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" }, + [ST_I_INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" }, + [ST_I_INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" }, + [ST_I_INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" }, + [ST_I_INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" }, + [ST_I_INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" }, + [ST_I_INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" }, + [ST_I_INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" }, + [ST_I_INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" }, + [ST_I_INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" }, + [ST_I_INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" }, + [ST_I_INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" }, + [ST_I_INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" }, + [ST_I_INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" }, + [ST_I_INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" }, + [ST_I_INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" }, + [ST_I_INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" }, + [ST_I_INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" }, + [ST_I_INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" }, + [ST_I_INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" }, + [ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" }, + [ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" }, + [ST_I_INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" }, + [ST_I_INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" }, + [ST_I_INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" }, + [ST_I_INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" }, + [ST_I_INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" }, + [ST_I_INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" }, + [ST_I_INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" }, + [ST_I_INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" }, + [ST_I_INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" }, + [ST_I_INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" }, + [ST_I_INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" }, + [ST_I_INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" }, + [ST_I_INF_NODE] = { .name = "node", .desc = "Node name (global.node)" }, + [ST_I_INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" }, + [ST_I_INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" }, + [ST_I_INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" }, + [ST_I_INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" }, + [ST_I_INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" }, + [ST_I_INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" }, + [ST_I_INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" }, + [ST_I_INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" }, + [ST_I_INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" }, + [ST_I_INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" }, + [ST_I_INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" }, + [ST_I_INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" }, + [ST_I_INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" }, + [ST_I_INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" }, + [ST_I_INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" }, + [ST_I_INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" }, + [ST_I_INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" }, + [ST_I_INF_WARNINGS] = { .name = "TotalWarnings", .desc = "Total warnings issued" }, + [ST_I_INF_MAXCONN_REACHED] = { .name = "MaxconnReached", .desc = "Number of times an accepted connection resulted in Maxconn being reached" }, + [ST_I_INF_BOOTTIME_MS] = { .name = "BootTime_ms", .desc = "How long ago it took to parse and process the config before being ready (milliseconds)" }, + [ST_I_INF_NICED_TASKS] = { .name = "Niced_tasks", .desc = "Total number of active tasks+tasklets in the current worker process (Run_queue) that are niced" }, }; /* one line of info */ -THREAD_LOCAL struct field info[INF_TOTAL_FIELDS]; - -/* description of statistics (static and dynamic) */ -static struct name_desc *stat_f[STATS_DOMAIN_COUNT]; -static size_t stat_count[STATS_DOMAIN_COUNT]; +THREAD_LOCAL struct field stat_line_info[ST_I_INF_MAX]; /* one line for stats */ -THREAD_LOCAL struct field *stat_l[STATS_DOMAIN_COUNT]; +THREAD_LOCAL struct field *stat_lines[STATS_DOMAIN_COUNT]; + +/* Unified storage for statistics from all module + * TODO merge info stats into it as global statistic domain. + */ +struct name_desc *stat_cols[STATS_DOMAIN_COUNT]; +size_t stat_cols_len[STATS_DOMAIN_COUNT]; /* list of all registered stats module */ -static struct list stats_module_list[STATS_DOMAIN_COUNT] = { +struct list stats_module_list[STATS_DOMAIN_COUNT] = { LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_PROXY]), LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_RESOLVERS]), }; THREAD_LOCAL void *trash_counters; -static THREAD_LOCAL struct buffer trash_chunk = BUF_NULL; - -static inline uint8_t stats_get_domain(uint32_t domain) +/* Insert <cols> generic stat columns into <st_tree> indexed by their name. */ +int generate_stat_tree(struct eb_root *st_tree, const struct stat_col cols[]) { - return domain >> STATS_DOMAIN & STATS_DOMAIN_MASK; -} + const struct stat_col *col; + struct stcol_node *node; + size_t len; + int i; -static inline enum stats_domain_px_cap stats_px_get_cap(uint32_t domain) -{ - return domain >> STATS_PX_CAP & STATS_PX_CAP_MASK; + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &cols[i]; + + if (stcol_is_generic(col)) { + len = strlen(col->name); + node = malloc(sizeof(struct stcol_node) + len + 1); + if (!node) + goto err; + + node->col = col; + memcpy(node->name.key, col->name, len); + node->name.key[len] = '\0'; + + ebst_insert(st_tree, &node->name); + } + } + + return 0; + + err: + return 1; } -static void stats_dump_json_schema(struct buffer *out); -int stats_putchk(struct appctx *appctx, struct htx *htx) +int stats_putchk(struct appctx *appctx, struct buffer *buf, struct htx *htx) { - struct stconn *sc = appctx_sc(appctx); - struct channel *chn = sc_ic(sc); - struct buffer *chk = &trash_chunk; + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; if (htx) { - if (chk->data >= channel_htx_recv_max(chn, htx)) { - sc_need_room(sc, chk->data); + if (b_data(chk) > htx_free_data_space(htx)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + return 0; + } + if (!htx_add_data_atonce(htx, ist2(b_orig(chk), b_data(chk)))) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); return 0; } - if (!htx_add_data_atonce(htx, ist2(chk->area, chk->data))) { - sc_need_room(sc, 0); + chunk_reset(chk); + } + else if (buf) { + if (b_data(chk) > b_room(buf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); return 0; } - channel_add_input(chn, chk->data); - chk->data = 0; + b_putblk(buf, b_head(chk), b_data(chk)); + chunk_reset(chk); } - else { + else { if (applet_putchk(appctx, chk) == -1) return 0; } return 1; } -static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc) + +int stats_is_full(struct appctx *appctx, struct buffer *buf, struct htx *htx) +{ + if (htx) { + if (htx_almost_full(htx)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto full; + } + } + else if (buf) { + if (buffer_almost_full(buf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + goto full; + } + } + else { + if (buffer_almost_full(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto full; + } + } + return 0; +full: + return 1; +} + +const char *stats_scope_ptr(struct appctx *appctx) { struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *req = sc_oc(sc); - struct htx *htx = htxbuf(&req->buf); + struct htx *htx = htxbuf(&appctx->inbuf); struct htx_blk *blk; struct ist uri; @@ -359,38 +302,39 @@ static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc) * -> stats_dump_html_info() // emits the equivalent of "show info" at the top * -> stats_dump_proxy_to_buffer() // same as above, valid for CSV and HTML * -> stats_dump_html_px_hdr() - * -> stats_dump_fe_stats() - * -> stats_dump_li_stats() - * -> stats_dump_sv_stats() - * -> stats_dump_be_stats() + * -> stats_dump_fe_line() + * -> stats_dump_li_line() + * -> stats_dump_sv_line() + * -> stats_dump_be_line() * -> stats_dump_html_px_end() * -> stats_dump_html_end() // emits HTML trailer * -> stats_dump_json_end() // emits JSON trailer */ -/* Dumps the stats CSV header to the local trash buffer. The caller is - * responsible for clearing it if needed. +/* Dumps the stats CSV header to <out> buffer. The caller is responsible for + * clearing it if needed. + * * NOTE: Some tools happen to rely on the field position instead of its name, * so please only append new fields at the end, never in the middle. */ -static void stats_dump_csv_header(enum stats_domain domain) +static void stats_dump_csv_header(enum stats_domain domain, struct buffer *out) { - int field; + int i; - chunk_appendf(&trash_chunk, "# "); - if (stat_f[domain]) { - for (field = 0; field < stat_count[domain]; ++field) { - chunk_appendf(&trash_chunk, "%s,", stat_f[domain][field].name); + chunk_appendf(out, "# "); + if (stat_cols[domain]) { + for (i = 0; i < stat_cols_len[domain]; ++i) { + chunk_appendf(out, "%s,", stat_cols[domain][i].name); /* print special delimiter on proxy stats to mark end of static fields */ - if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) - chunk_appendf(&trash_chunk, "-,"); + if (domain == STATS_DOMAIN_PROXY && i + 1 == ST_I_PX_MAX) + chunk_appendf(out, "-,"); } } - chunk_appendf(&trash_chunk, "\n"); + chunk_appendf(out, "\n"); } /* Emits a stats field without any surrounding element and properly encoded to @@ -414,21 +358,6 @@ int stats_emit_raw_data_field(struct buffer *out, const struct field *f) } } -const char *field_to_html_str(const struct field *f) -{ - switch (field_format(f, 0)) { - case FF_S32: return U2H(f->u.s32); - case FF_S64: return U2H(f->u.s64); - case FF_U64: return U2H(f->u.u64); - case FF_U32: return U2H(f->u.u32); - case FF_FLT: return F2H(f->u.flt); - case FF_STR: return field_str(f, 0); - case FF_EMPTY: - default: - return ""; - } -} - /* Emits a stats field prefixed with its type. No CSV encoding is prepared, the * output is supposed to be used on its own line. Returns non-zero on success, 0 * if the buffer is full. @@ -451,61 +380,6 @@ int stats_emit_typed_data_field(struct buffer *out, const struct field *f) } } -/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per - * the recommendation for interoperable integers in section 6 of RFC 7159. - */ -#define JSON_INT_MAX ((1ULL << 53) - 1) -#define JSON_INT_MIN (0 - JSON_INT_MAX) - -/* Emits a stats field value and its type in JSON. - * Returns non-zero on success, 0 on error. - */ -int stats_emit_json_data_field(struct buffer *out, const struct field *f) -{ - int old_len; - char buf[20]; - const char *type, *value = buf, *quote = ""; - - switch (field_format(f, 0)) { - case FF_EMPTY: return 1; - case FF_S32: type = "\"s32\""; - snprintf(buf, sizeof(buf), "%d", f->u.s32); - break; - case FF_U32: type = "\"u32\""; - snprintf(buf, sizeof(buf), "%u", f->u.u32); - break; - case FF_S64: type = "\"s64\""; - if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX) - return 0; - type = "\"s64\""; - snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64); - break; - case FF_U64: if (f->u.u64 > JSON_INT_MAX) - return 0; - type = "\"u64\""; - snprintf(buf, sizeof(buf), "%llu", - (unsigned long long) f->u.u64); - break; - case FF_FLT: type = "\"flt\""; - flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt)); - break; - case FF_STR: type = "\"str\""; - value = field_str(f, 0); - quote = "\""; - break; - default: snprintf(buf, sizeof(buf), "%u", f->type); - type = buf; - value = "unknown"; - quote = "\""; - break; - } - - old_len = out->data; - chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}", - type, quote, value, quote); - return !(old_len == out->data); -} - /* Emits an encoding of the field type on 3 characters followed by a delimiter. * Returns non-zero on success, 0 if the buffer is full. */ @@ -550,73 +424,23 @@ int stats_emit_field_tags(struct buffer *out, const struct field *f, return chunk_appendf(out, "%c%c%c%c", origin, nature, scope, delim); } -/* Emits an encoding of the field type as JSON. - * Returns non-zero on success, 0 if the buffer is full. - */ -int stats_emit_json_field_tags(struct buffer *out, const struct field *f) -{ - const char *origin, *nature, *scope; - int old_len; - - switch (field_origin(f, 0)) { - case FO_METRIC: origin = "Metric"; break; - case FO_STATUS: origin = "Status"; break; - case FO_KEY: origin = "Key"; break; - case FO_CONFIG: origin = "Config"; break; - case FO_PRODUCT: origin = "Product"; break; - default: origin = "Unknown"; break; - } - - switch (field_nature(f, 0)) { - case FN_GAUGE: nature = "Gauge"; break; - case FN_LIMIT: nature = "Limit"; break; - case FN_MIN: nature = "Min"; break; - case FN_MAX: nature = "Max"; break; - case FN_RATE: nature = "Rate"; break; - case FN_COUNTER: nature = "Counter"; break; - case FN_DURATION: nature = "Duration"; break; - case FN_AGE: nature = "Age"; break; - case FN_TIME: nature = "Time"; break; - case FN_NAME: nature = "Name"; break; - case FN_OUTPUT: nature = "Output"; break; - case FN_AVG: nature = "Avg"; break; - default: nature = "Unknown"; break; - } - - switch (field_scope(f, 0)) { - case FS_PROCESS: scope = "Process"; break; - case FS_SERVICE: scope = "Service"; break; - case FS_SYSTEM: scope = "System"; break; - case FS_CLUSTER: scope = "Cluster"; break; - default: scope = "Unknown"; break; - } - - old_len = out->data; - chunk_appendf(out, "\"tags\":{" - "\"origin\":\"%s\"," - "\"nature\":\"%s\"," - "\"scope\":\"%s\"" - "}", origin, nature, scope); - return !(old_len == out->data); -} - -/* Dump all fields from <stats> into <out> using CSV format */ +/* Dump all fields from <line> into <out> using CSV format */ static int stats_dump_fields_csv(struct buffer *out, - const struct field *stats, size_t stats_count, + const struct field *line, size_t stats_count, struct show_stat_ctx *ctx) { int domain = ctx->domain; - int field; + int i; - for (field = 0; field < stats_count; ++field) { - if (!stats_emit_raw_data_field(out, &stats[field])) + for (i = 0; i < stats_count; ++i) { + if (!stats_emit_raw_data_field(out, &line[i])) return 0; if (!chunk_strcat(out, ",")) return 0; /* print special delimiter on proxy stats to mark end of static fields */ - if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) { + if (domain == STATS_DOMAIN_PROXY && i + 1 == ST_I_PX_MAX) { if (!chunk_strcat(out, "-,")) return 0; } @@ -626,50 +450,50 @@ static int stats_dump_fields_csv(struct buffer *out, return 1; } -/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */ +/* Dump all fields from <line> into <out> using a typed "field:desc:type:value" format */ static int stats_dump_fields_typed(struct buffer *out, - const struct field *stats, + const struct field *line, size_t stats_count, struct show_stat_ctx * ctx) { int flags = ctx->flags; int domain = ctx->domain; - int field; + int i; - for (field = 0; field < stats_count; ++field) { - if (!stats[field].type) + for (i = 0; i < stats_count; ++i) { + if (!line[i].type) continue; switch (domain) { case STATS_DOMAIN_PROXY: chunk_appendf(out, "%c.%u.%u.%d.%s.%u:", - stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE ? 'F' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE ? 'B' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO ? 'L' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV ? 'S' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_FE ? 'F' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_BE ? 'B' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SO ? 'L' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SV ? 'S' : '?', - stats[ST_F_IID].u.u32, stats[ST_F_SID].u.u32, - field, - stat_f[domain][field].name, - stats[ST_F_PID].u.u32); + line[ST_I_PX_IID].u.u32, line[ST_I_PX_SID].u.u32, + i, + stat_cols[domain][i].name, + line[ST_I_PX_PID].u.u32); break; case STATS_DOMAIN_RESOLVERS: - chunk_appendf(out, "N.%d.%s:", field, - stat_f[domain][field].name); + chunk_appendf(out, "N.%d.%s:", i, + stat_cols[domain][i].name); break; default: break; } - if (!stats_emit_field_tags(out, &stats[field], ':')) + if (!stats_emit_field_tags(out, &line[i], ':')) return 0; - if (!stats_emit_typed_data_field(out, &stats[field])) + if (!stats_emit_typed_data_field(out, &line[i])) return 0; - if (flags & STAT_SHOW_FDESC && - !chunk_appendf(out, ":\"%s\"", stat_f[domain][field].desc)) { + if (flags & STAT_F_SHOW_FDESC && + !chunk_appendf(out, ":\"%s\"", stat_cols[domain][i].desc)) { return 0; } @@ -679,3254 +503,42 @@ static int stats_dump_fields_typed(struct buffer *out, return 1; } -/* Dump all fields from <stats> into <out> using the "show info json" format */ -static int stats_dump_json_info_fields(struct buffer *out, - const struct field *info, - struct show_stat_ctx *ctx) -{ - int started = (ctx->field) ? 1 : 0; - int ready_data = 0; - - if (!started && !chunk_strcat(out, "[")) - return 0; - - for (; ctx->field < INF_TOTAL_FIELDS; ctx->field++) { - int old_len; - int field = ctx->field; - - if (!field_format(info, field)) - continue; - - if (started && !chunk_strcat(out, ",")) - goto err; - started = 1; - - old_len = out->data; - chunk_appendf(out, - "{\"field\":{\"pos\":%d,\"name\":\"%s\"}," - "\"processNum\":%u,", - field, info_fields[field].name, - info[INF_PROCESS_NUM].u.u32); - if (old_len == out->data) - goto err; - - if (!stats_emit_json_field_tags(out, &info[field])) - goto err; - - if (!stats_emit_json_data_field(out, &info[field])) - goto err; - - if (!chunk_strcat(out, "}")) - goto err; - ready_data = out->data; - } - - if (!chunk_strcat(out, "]\n")) - goto err; - ctx->field = 0; /* we're done */ - return 1; - -err: - if (!ready_data) { - /* not enough buffer space for a single entry.. */ - chunk_reset(out); - chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n"); - return 0; /* hard error */ - } - /* push ready data and wait for a new buffer to complete the dump */ - out->data = ready_data; - return 1; -} - -static void stats_print_proxy_field_json(struct buffer *out, - const struct field *stat, - const char *name, - int pos, - uint32_t field_type, - uint32_t iid, - uint32_t sid, - uint32_t pid) -{ - const char *obj_type; - switch (field_type) { - case STATS_TYPE_FE: obj_type = "Frontend"; break; - case STATS_TYPE_BE: obj_type = "Backend"; break; - case STATS_TYPE_SO: obj_type = "Listener"; break; - case STATS_TYPE_SV: obj_type = "Server"; break; - default: obj_type = "Unknown"; break; - } - - chunk_appendf(out, - "{" - "\"objType\":\"%s\"," - "\"proxyId\":%u," - "\"id\":%u," - "\"field\":{\"pos\":%d,\"name\":\"%s\"}," - "\"processNum\":%u,", - obj_type, iid, sid, pos, name, pid); -} - -static void stats_print_rslv_field_json(struct buffer *out, - const struct field *stat, - const char *name, - int pos) -{ - chunk_appendf(out, - "{" - "\"field\":{\"pos\":%d,\"name\":\"%s\"},", - pos, name); -} - - -/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */ -static int stats_dump_fields_json(struct buffer *out, - const struct field *stats, size_t stats_count, - struct show_stat_ctx *ctx) -{ - int flags = ctx->flags; - int domain = ctx->domain; - int started = (ctx->field) ? 1 : 0; - int ready_data = 0; - - if (!started && (flags & STAT_STARTED) && !chunk_strcat(out, ",")) - return 0; - if (!started && !chunk_strcat(out, "[")) - return 0; - - for (; ctx->field < stats_count; ctx->field++) { - int old_len; - int field = ctx->field; - - if (!stats[field].type) - continue; - - if (started && !chunk_strcat(out, ",")) - goto err; - started = 1; - - old_len = out->data; - if (domain == STATS_DOMAIN_PROXY) { - stats_print_proxy_field_json(out, &stats[field], - stat_f[domain][field].name, - field, - stats[ST_F_TYPE].u.u32, - stats[ST_F_IID].u.u32, - stats[ST_F_SID].u.u32, - stats[ST_F_PID].u.u32); - } else if (domain == STATS_DOMAIN_RESOLVERS) { - stats_print_rslv_field_json(out, &stats[field], - stat_f[domain][field].name, - field); - } - - if (old_len == out->data) - goto err; - - if (!stats_emit_json_field_tags(out, &stats[field])) - goto err; - - if (!stats_emit_json_data_field(out, &stats[field])) - goto err; - - if (!chunk_strcat(out, "}")) - goto err; - ready_data = out->data; - } - - if (!chunk_strcat(out, "]")) - goto err; - - ctx->field = 0; /* we're done */ - return 1; - -err: - if (!ready_data) { - /* not enough buffer space for a single entry.. */ - chunk_reset(out); - if (ctx->flags & STAT_STARTED) - chunk_strcat(out, ","); - chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}"); - return 0; /* hard error */ - } - /* push ready data and wait for a new buffer to complete the dump */ - out->data = ready_data; - return 1; -} - -/* Dump all fields from <stats> into <out> using the HTML format. A column is - * reserved for the checkbox is STAT_ADMIN is set in <flags>. Some extra info - * are provided if STAT_SHLGNDS is present in <flags>. The statistics from - * extra modules are displayed at the end of the lines if STAT_SHMODULES is - * present in <flags>. - */ -static int stats_dump_fields_html(struct buffer *out, - const struct field *stats, - struct show_stat_ctx *ctx) -{ - struct buffer src; - struct stats_module *mod; - int flags = ctx->flags; - int i = 0, j = 0; - - if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE) { - chunk_appendf(out, - /* name, queue */ - "<tr class=\"frontend\">"); - - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - - chunk_appendf(out, - "<td class=ac>" - "<a name=\"%s/Frontend\"></a>" - "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>" - "<td colspan=3></td>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME)); - - chunk_appendf(out, - /* sessions rate : current */ - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Current connection rate:</th><td>%s/s</td></tr>" - "<tr><th>Current session rate:</th><td>%s/s</td></tr>" - "", - U2H(stats[ST_F_RATE].u.u32), - U2H(stats[ST_F_CONN_RATE].u.u32), - U2H(stats[ST_F_RATE].u.u32)); - - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, - "<tr><th>Current request rate:</th><td>%s/s</td></tr>", - U2H(stats[ST_F_REQ_RATE].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions rate : max */ - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Max connection rate:</th><td>%s/s</td></tr>" - "<tr><th>Max session rate:</th><td>%s/s</td></tr>" - "", - U2H(stats[ST_F_RATE_MAX].u.u32), - U2H(stats[ST_F_CONN_RATE_MAX].u.u32), - U2H(stats[ST_F_RATE_MAX].u.u32)); - - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, - "<tr><th>Max request rate:</th><td>%s/s</td></tr>", - U2H(stats[ST_F_REQ_RATE_MAX].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions rate : limit */ - "<td>%s</td>", - LIM2A(stats[ST_F_RATE_LIM].u.u32, "-")); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. connections:</th><td>%s</td></tr>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_CONN_TOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>- HTTP/1 sessions:</th><td>%s</td></tr>" - "<tr><th>- HTTP/2 sessions:</th><td>%s</td></tr>" - "<tr><th>- HTTP/3 sessions:</th><td>%s</td></tr>" - "<tr><th>- other sessions:</th><td>%s</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/1 requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/2 requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/3 requests:</th><td>%s</td></tr>" - "<tr><th>- other requests:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_H1SESS].u.u64), - U2H(stats[ST_F_H2SESS].u.u64), - U2H(stats[ST_F_H3SESS].u.u64), - U2H(stats[ST_F_SESS_OTHER].u.u64), - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_H1REQ].u.u64), - U2H(stats[ST_F_H2REQ].u.u64), - U2H(stats[ST_F_H3REQ].u.u64), - U2H(stats[ST_F_REQ_OTHER].u.u64)); - - chunk_appendf(out, - "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" - "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" - "<tr><th>- other responses:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_HRSP_1XX].u.u64), - U2H(stats[ST_F_HRSP_2XX].u.u64), - U2H(stats[ST_F_COMP_RSP].u.u64), - stats[ST_F_HRSP_2XX].u.u64 ? - (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), - U2H(stats[ST_F_HRSP_4XX].u.u64), - U2H(stats[ST_F_HRSP_5XX].u.u64), - U2H(stats[ST_F_HRSP_OTHER].u.u64)); - - chunk_appendf(out, - "<tr><th>Intercepted requests:</th><td>%s</td></tr>" - "<tr><th>Cache lookups:</th><td>%s</td></tr>" - "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal errors:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_INTERCEPTED].u.u64), - U2H(stats[ST_F_CACHE_LOOKUPS].u.u64), - U2H(stats[ST_F_CACHE_HITS].u.u64), - stats[ST_F_CACHE_LOOKUPS].u.u64 ? - (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, lastsess */ - "<td></td><td></td>" - /* bytes : in */ - "<td>%s</td>" - "", - U2H(stats[ST_F_BIN].u.u64)); - - chunk_appendf(out, - /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ - "<td>%s%s<div class=tips><table class=det>" - "<tr><th>Response bytes in:</th><td>%s</td></tr>" - "<tr><th>Compression in:</th><td>%s</td></tr>" - "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Compression bypass:</th><td>%s</td></tr>" - "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" - "</table></div>%s</td>", - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"", - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64), - U2H(stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0, - U2H(stats[ST_F_COMP_BYP].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0, - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":""); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors : request, connect, response */ - "<td>%s</td><td></td><td></td>" - /* warnings: retries, redispatches */ - "<td></td><td></td>" - /* server status : reflect frontend status */ - "<td class=ac>%s</td>" - /* rest of server: nothing */ - "<td class=ac colspan=8></td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_EREQ].u.u64), - field_str(stats, ST_F_STATUS)); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO) { - chunk_appendf(out, "<tr class=socket>"); - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - - chunk_appendf(out, - /* frontend name, listener name */ - "<td class=ac><a name=\"%s/+%s\"></a>%s" - "<a class=lfsb href=\"#%s/+%s\">%s</a>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), - (flags & STAT_SHLGNDS)?"<u>":"", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME)); - - if (flags & STAT_SHLGNDS) { - chunk_appendf(out, "<div class=tips>"); - - if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR))) - chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR) == '[') - chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR)) - chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR)); - - chunk_appendf(out, "proto=%s, ", field_str(stats, ST_F_PROTO)); - - /* id */ - chunk_appendf(out, "id: %d</div>", stats[ST_F_SID].u.u32); - } - - chunk_appendf(out, - /* queue */ - "%s</td><td colspan=3></td>" - /* sessions rate: current, max, limit */ - "<td colspan=3> </td>" - /* sessions: current, max, limit, total, lbtot, lastsess */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td>%s</td><td> </td><td> </td>" - /* bytes: in, out */ - "<td>%s</td><td>%s</td>" - "", - (flags & STAT_SHLGNDS)?"</u>":"", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64)); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors: request, connect, response */ - "<td>%s</td><td></td><td></td>" - /* warnings: retries, redispatches */ - "<td></td><td></td>" - /* server status: reflect listener status */ - "<td class=ac>%s</td>" - /* rest of server: nothing */ - "<td class=ac colspan=8></td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_EREQ].u.u64), - field_str(stats, ST_F_STATUS)); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV) { - const char *style; - - /* determine the style to use depending on the server's state, - * its health and weight. There isn't a 1-to-1 mapping between - * state and styles for the cases where the server is (still) - * up. The reason is that we don't want to report nolb and - * drain with the same color. - */ - - if (strcmp(field_str(stats, ST_F_STATUS), "DOWN") == 0 || - strcmp(field_str(stats, ST_F_STATUS), "DOWN (agent)") == 0) { - style = "down"; - } - else if (strncmp(field_str(stats, ST_F_STATUS), "DOWN ", strlen("DOWN ")) == 0) { - style = "going_up"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "DRAIN") == 0) { - style = "draining"; - } - else if (strncmp(field_str(stats, ST_F_STATUS), "NOLB ", strlen("NOLB ")) == 0) { - style = "going_down"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "NOLB") == 0) { - style = "nolb"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) { - style = "no_check"; - } - else if (!stats[ST_F_CHKFAIL].type || - stats[ST_F_CHECK_HEALTH].u.u32 == stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) { - /* no check or max health = UP */ - if (stats[ST_F_WEIGHT].u.u32) - style = "up"; - else - style = "draining"; - } - else { - style = "going_down"; - } - - if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) - chunk_appendf(out, "<tr class=\"maintain\">"); - else - chunk_appendf(out, - "<tr class=\"%s_%s\">", - (stats[ST_F_BCK].u.u32) ? "backup" : "active", style); - - - if (flags & STAT_ADMIN) - chunk_appendf(out, - "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>", - field_str(stats, ST_F_PXNAME), - field_str(stats, ST_F_SVNAME)); - - chunk_appendf(out, - "<td class=ac><a name=\"%s/%s\"></a>%s" - "<a class=lfsb href=\"#%s/%s\">%s</a>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), - (flags & STAT_SHLGNDS) ? "<u>" : "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME)); - - if (flags & STAT_SHLGNDS) { - chunk_appendf(out, "<div class=tips>"); - - if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR))) - chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR) == '[') - chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR)) - chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR)); - - /* id */ - chunk_appendf(out, "id: %d, rid: %d", stats[ST_F_SID].u.u32, stats[ST_F_SRID].u.u32); - - /* cookie */ - if (stats[ST_F_COOKIE].type) { - chunk_appendf(out, ", cookie: '"); - chunk_initstr(&src, field_str(stats, ST_F_COOKIE)); - chunk_htmlencode(out, &src); - chunk_appendf(out, "'"); - } - - chunk_appendf(out, "</div>"); - } - - chunk_appendf(out, - /* queue : current, max, limit */ - "%s</td><td>%s</td><td>%s</td><td>%s</td>" - /* sessions rate : current, max, limit */ - "<td>%s</td><td>%s</td><td></td>" - "", - (flags & STAT_SHLGNDS) ? "</u>" : "", - U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), LIM2A(stats[ST_F_QLIMIT].u.u32, "-"), - U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32)); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td><u>%s<div class=tips>" - "<table class=det>" - "<tr><th>Current active connections:</th><td>%s</td></tr>" - "<tr><th>Current used connections:</th><td>%s</td></tr>" - "<tr><th>Current idle connections:</th><td>%s</td></tr>" - "<tr><th>- unsafe:</th><td>%s</td></tr>" - "<tr><th>- safe:</th><td>%s</td></tr>" - "<tr><th>Estimated need of connections:</th><td>%s</td></tr>" - "<tr><th>Active connections limit:</th><td>%s</td></tr>" - "<tr><th>Idle connections limit:</th><td>%s</td></tr>" - "</table></div></u>" - "</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), - U2H(stats[ST_F_SCUR].u.u32), - U2H(stats[ST_F_USED_CONN_CUR].u.u32), - U2H(stats[ST_F_SRV_ICUR].u.u32), - U2H(stats[ST_F_IDLE_CONN_CUR].u.u32), - U2H(stats[ST_F_SAFE_CONN_CUR].u.u32), - U2H(stats[ST_F_NEED_CONN_EST].u.u32), - - LIM2A(stats[ST_F_SLIM].u.u32, "-"), - stats[ST_F_SRV_ILIM].type ? U2H(stats[ST_F_SRV_ILIM].u.u32) : "-", - U2H(stats[ST_F_SMAX].u.u32), LIM2A(stats[ST_F_SLIM].u.u32, "-"), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>New connections:</th><td>%s</td></tr>" - "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal error:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_CONNECT].u.u64), - U2H(stats[ST_F_REUSE].u.u64), - (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ? - (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0, - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_HRSP_1XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_1XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_2XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_2XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_3XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_4XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_4XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_5XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_5XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_OTHER].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_OTHER].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); - chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32)); - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, last */ - "<td>%s</td><td>%s</td>", - U2H(stats[ST_F_LBTOT].u.u64), - human_time(stats[ST_F_LASTSESS].u.s32, 1)); - - chunk_appendf(out, - /* bytes : in, out */ - "<td>%s</td><td>%s</td>" - /* denied: req, resp */ - "<td></td><td>%s</td>" - /* errors : request, connect */ - "<td></td><td>%s</td>" - /* errors : response */ - "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" - /* warnings: retries, redispatches */ - "<td>%lld</td><td>%lld</td>" - "", - U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_ECON].u.u64), - U2H(stats[ST_F_ERESP].u.u64), - (long long)stats[ST_F_CLI_ABRT].u.u64, - (long long)stats[ST_F_SRV_ABRT].u.u64, - (long long)stats[ST_F_WRETR].u.u64, - (long long)stats[ST_F_WREDIS].u.u64); - - /* status, last change */ - chunk_appendf(out, "<td class=ac>"); - - /* FIXME!!!! - * LASTCHG should contain the last change for *this* server and must be computed - * properly above, as was done below, ie: this server if maint, otherwise ref server - * if tracking. Note that ref is either local or remote depending on tracking. - */ - - - if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) { - chunk_appendf(out, "%s MAINT", human_time(stats[ST_F_LASTCHG].u.u32, 1)); - } - else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) { - chunk_strcat(out, "<i>no check</i>"); - } - else { - chunk_appendf(out, "%s %s", human_time(stats[ST_F_LASTCHG].u.u32, 1), field_str(stats, ST_F_STATUS)); - if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0) { - if (stats[ST_F_CHECK_HEALTH].u.u32) - chunk_strcat(out, " ↑"); - } - else if (stats[ST_F_CHECK_HEALTH].u.u32 < stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) - chunk_strcat(out, " ↓"); - } - if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0 && - stats[ST_F_AGENT_STATUS].type && !stats[ST_F_AGENT_HEALTH].u.u32) { - chunk_appendf(out, - "</td><td class=ac><u> %s", - field_str(stats, ST_F_AGENT_STATUS)); - - if (stats[ST_F_AGENT_CODE].type) - chunk_appendf(out, "/%d", stats[ST_F_AGENT_CODE].u.u32); - - if (stats[ST_F_AGENT_DURATION].type) - chunk_appendf(out, " in %lums", (long)stats[ST_F_AGENT_DURATION].u.u64); - - chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_AGENT_DESC)); - - if (*field_str(stats, ST_F_LAST_AGT)) { - chunk_appendf(out, ": "); - chunk_initstr(&src, field_str(stats, ST_F_LAST_AGT)); - chunk_htmlencode(out, &src); - } - chunk_appendf(out, "</div></u>"); - } - else if (stats[ST_F_CHECK_STATUS].type) { - chunk_appendf(out, - "</td><td class=ac><u> %s", - field_str(stats, ST_F_CHECK_STATUS)); - - if (stats[ST_F_CHECK_CODE].type) - chunk_appendf(out, "/%d", stats[ST_F_CHECK_CODE].u.u32); - - if (stats[ST_F_CHECK_DURATION].type) - chunk_appendf(out, " in %lums", (long)stats[ST_F_CHECK_DURATION].u.u64); - - chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_CHECK_DESC)); - - if (*field_str(stats, ST_F_LAST_CHK)) { - chunk_appendf(out, ": "); - chunk_initstr(&src, field_str(stats, ST_F_LAST_CHK)); - chunk_htmlencode(out, &src); - } - chunk_appendf(out, "</div></u>"); - } - else - chunk_appendf(out, "</td><td>"); - - chunk_appendf(out, - /* weight / uweight */ - "</td><td class=ac>%d/%d</td>" - /* act, bck */ - "<td class=ac>%s</td><td class=ac>%s</td>" - "", - stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32, - stats[ST_F_BCK].u.u32 ? "-" : "Y", - stats[ST_F_BCK].u.u32 ? "Y" : "-"); - - /* check failures: unique, fatal, down time */ - if (strcmp(field_str(stats, ST_F_STATUS), "MAINT (resolution)") == 0) { - chunk_appendf(out, "<td class=ac colspan=3>resolution</td>"); - } - else if (stats[ST_F_CHKFAIL].type) { - chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_F_CHKFAIL].u.u64); - - if (stats[ST_F_HANAFAIL].type) - chunk_appendf(out, "/%lld", (long long)stats[ST_F_HANAFAIL].u.u64); - - chunk_appendf(out, - "<div class=tips>Failed Health Checks%s</div></u></td>" - "<td>%lld</td><td>%s</td>" - "", - stats[ST_F_HANAFAIL].type ? "/Health Analyses" : "", - (long long)stats[ST_F_CHKDOWN].u.u64, human_time(stats[ST_F_DOWNTIME].u.u32, 1)); - } - else if (strcmp(field_str(stats, ST_F_STATUS), "MAINT") != 0 && field_format(stats, ST_F_TRACKED) == FF_STR) { - /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */ - chunk_appendf(out, - "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>", - field_str(stats, ST_F_TRACKED), field_str(stats, ST_F_TRACKED)); - } - else - chunk_appendf(out, "<td colspan=3></td>"); - - /* throttle */ - if (stats[ST_F_THROTTLE].type) - chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_F_THROTTLE].u.u32); - else - chunk_appendf(out, "<td class=ac>-</td>"); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>\n"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE) { - chunk_appendf(out, "<tr class=\"backend\">"); - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - chunk_appendf(out, - "<td class=ac>" - /* name */ - "%s<a name=\"%s/Backend\"></a>" - "<a class=lfsb href=\"#%s/Backend\">Backend</a>" - "", - (flags & STAT_SHLGNDS)?"<u>":"", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME)); - - if (flags & STAT_SHLGNDS) { - /* balancing */ - chunk_appendf(out, "<div class=tips>balancing: %s", - field_str(stats, ST_F_ALGO)); - - /* cookie */ - if (stats[ST_F_COOKIE].type) { - chunk_appendf(out, ", cookie: '"); - chunk_initstr(&src, field_str(stats, ST_F_COOKIE)); - chunk_htmlencode(out, &src); - chunk_appendf(out, "'"); - } - chunk_appendf(out, "</div>"); - } - - chunk_appendf(out, - "%s</td>" - /* queue : current, max */ - "<td>%s</td><td>%s</td><td></td>" - /* sessions rate : current, max, limit */ - "<td>%s</td><td>%s</td><td></td>" - "", - (flags & STAT_SHLGNDS)?"</u>":"", - U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), - U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32)); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>New connections:</th><td>%s</td></tr>" - "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" - "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" - "<tr><th>- other responses:</th><td>%s</td></tr>" - "<tr><th>Cache lookups:</th><td>%s</td></tr>" - "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal errors:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_CONNECT].u.u64), - U2H(stats[ST_F_REUSE].u.u64), - (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ? - (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0, - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_HRSP_1XX].u.u64), - U2H(stats[ST_F_HRSP_2XX].u.u64), - U2H(stats[ST_F_COMP_RSP].u.u64), - stats[ST_F_HRSP_2XX].u.u64 ? - (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), - U2H(stats[ST_F_HRSP_4XX].u.u64), - U2H(stats[ST_F_HRSP_5XX].u.u64), - U2H(stats[ST_F_HRSP_OTHER].u.u64), - U2H(stats[ST_F_CACHE_LOOKUPS].u.u64), - U2H(stats[ST_F_CACHE_HITS].u.u64), - stats[ST_F_CACHE_LOOKUPS].u.u64 ? - (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); - chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32)); - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, last */ - "<td>%s</td><td>%s</td>" - /* bytes: in */ - "<td>%s</td>" - "", - U2H(stats[ST_F_LBTOT].u.u64), - human_time(stats[ST_F_LASTSESS].u.s32, 1), - U2H(stats[ST_F_BIN].u.u64)); - - chunk_appendf(out, - /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ - "<td>%s%s<div class=tips><table class=det>" - "<tr><th>Response bytes in:</th><td>%s</td></tr>" - "<tr><th>Compression in:</th><td>%s</td></tr>" - "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Compression bypass:</th><td>%s</td></tr>" - "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" - "</table></div>%s</td>", - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"", - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64), - U2H(stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0, - U2H(stats[ST_F_COMP_BYP].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0, - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":""); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors : request, connect */ - "<td></td><td>%s</td>" - /* errors : response */ - "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" - /* warnings: retries, redispatches */ - "<td>%lld</td><td>%lld</td>" - /* backend status: reflect backend status (up/down): we display UP - * if the backend has known working servers or if it has no server at - * all (eg: for stats). Then we display the total weight, number of - * active and backups. */ - "<td class=ac>%s %s</td><td class=ac> </td><td class=ac>%d/%d</td>" - "<td class=ac>%d</td><td class=ac>%d</td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_ECON].u.u64), - U2H(stats[ST_F_ERESP].u.u64), - (long long)stats[ST_F_CLI_ABRT].u.u64, - (long long)stats[ST_F_SRV_ABRT].u.u64, - (long long)stats[ST_F_WRETR].u.u64, (long long)stats[ST_F_WREDIS].u.u64, - human_time(stats[ST_F_LASTCHG].u.u32, 1), - strcmp(field_str(stats, ST_F_STATUS), "DOWN") ? field_str(stats, ST_F_STATUS) : "<font color=\"red\"><b>DOWN</b></font>", - stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32, - stats[ST_F_ACT].u.u32, stats[ST_F_BCK].u.u32); - - chunk_appendf(out, - /* rest of backend: nothing, down transitions, total downtime, throttle */ - "<td class=ac> </td><td>%d</td>" - "<td>%s</td>" - "<td></td>", - stats[ST_F_CHKDOWN].u.u32, - stats[ST_F_DOWNTIME].type ? human_time(stats[ST_F_DOWNTIME].u.u32, 1) : " "); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - - return 1; -} - -int stats_dump_one_line(const struct field *stats, size_t stats_count, +int stats_dump_one_line(const struct field *line, size_t stats_count, struct appctx *appctx) { struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; int ret; - if (ctx->flags & STAT_FMT_HTML) - ret = stats_dump_fields_html(&trash_chunk, stats, ctx); - else if (ctx->flags & STAT_FMT_TYPED) - ret = stats_dump_fields_typed(&trash_chunk, stats, stats_count, ctx); - else if (ctx->flags & STAT_FMT_JSON) - ret = stats_dump_fields_json(&trash_chunk, stats, stats_count, ctx); + if (ctx->flags & STAT_F_FMT_HTML) + ret = stats_dump_fields_html(chk, line, ctx); + else if (ctx->flags & STAT_F_FMT_TYPED) + ret = stats_dump_fields_typed(chk, line, stats_count, ctx); + else if (ctx->flags & STAT_F_FMT_JSON) + ret = stats_dump_fields_json(chk, line, stats_count, ctx); + else if (ctx->flags & STAT_F_FMT_FILE) + ret = stats_dump_fields_file(chk, line, stats_count, ctx); else - ret = stats_dump_fields_csv(&trash_chunk, stats, stats_count, ctx); + ret = stats_dump_fields_csv(chk, line, stats_count, ctx); return ret; } -/* Fill <stats> with the frontend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for frontends, the - * function returns 0, otherwise, it returns 1. - */ -int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND"); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_SCUR: - metric = mkf_u32(0, px->feconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, px->fe_counters.conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_resp); - break; - case ST_F_EREQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_req); - break; - case ST_F_DCON: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_conn); - break; - case ST_F_DSES: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_sess); - break; - case ST_F_STATUS: { - const char *state; - - if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) - state = "STOP"; - else if (px->flags & PR_FL_PAUSED) - state = "PAUSED"; - else - state = "OPEN"; - metric = mkf_str(FO_STATUS, state); - break; - } - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, 0); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE); - break; - case ST_F_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_sess_per_sec)); - break; - case ST_F_RATE_LIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.sps_max); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.internal_errors); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[0]); - break; - case ST_F_INTERCEPTED: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.intercepted_req); - break; - case ST_F_CACHE_LOOKUPS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_lookups); - break; - case ST_F_CACHE_HITS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_hits); - break; - case ST_F_REQ_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_req_per_sec)); - break; - case ST_F_REQ_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max); - break; - case ST_F_REQ_TOT: { - int i; - uint64_t total_req; - size_t nb_reqs = - sizeof(px->fe_counters.p.http.cum_req) / sizeof(*px->fe_counters.p.http.cum_req); - - total_req = 0; - for (i = 0; i < nb_reqs; i++) - total_req += px->fe_counters.p.http.cum_req[i]; - metric = mkf_u64(FN_COUNTER, total_req); - break; - } - case ST_F_COMP_IN: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_in[COMP_DIR_RES]); - break; - case ST_F_COMP_OUT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_out[COMP_DIR_RES]); - break; - case ST_F_COMP_BYP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_byp[COMP_DIR_RES]); - break; - case ST_F_COMP_RSP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.comp_rsp); - break; - case ST_F_CONN_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_conn_per_sec)); - break; - case ST_F_CONN_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.cps_max); - break; - case ST_F_CONN_TOT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_conn); - break; - case ST_F_SESS_OTHER: { - int i; - uint64_t total_sess; - size_t nb_sess = - sizeof(px->fe_counters.cum_sess_ver) / sizeof(*px->fe_counters.cum_sess_ver); - - total_sess = px->fe_counters.cum_sess; - for (i = 0; i < nb_sess; i++) - total_sess -= px->fe_counters.cum_sess_ver[i]; - total_sess = (int64_t)total_sess < 0 ? 0 : total_sess; - metric = mkf_u64(FN_COUNTER, total_sess); - break; - } - case ST_F_H1SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[0]); - break; - case ST_F_H2SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[1]); - break; - case ST_F_H3SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[2]); - break; - case ST_F_REQ_OTHER: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[0]); - break; - case ST_F_H1REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[1]); - break; - case ST_F_H2REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[2]); - break; - case ST_F_H3REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[3]); - break; - default: - /* not used for frontends. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a frontend's line to the local trash buffer for the current proxy <px> - * and uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. Returns non-zero if it emits - * anything, zero otherwise. - */ -static int stats_dump_fe_stats(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - if (!(px->cap & PR_CAP_FE)) - return 0; - - if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE))) - return 0; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_fe_stats(px, stats, ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Fill <stats> with the listener statistics. <stats> is preallocated array of - * length <len>. The length of the array must be at least ST_F_TOTAL_FIELDS. If - * this length is less then this value, the function returns 0, otherwise, it - * returns 1. If selected_field is != NULL, only fill this one. <flags> can - * take the value STAT_SHLGNDS. - */ -int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags, - struct field *stats, int len, enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - struct buffer *out = get_trash_chunk(); - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - if (!l->counters) - return 0; - - chunk_reset(out); - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_SCUR: - metric = mkf_u32(0, l->nbconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, l->counters->conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, l->bind_conf->maxconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, l->counters->cum_conn); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, l->counters->bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, l->counters->bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, l->counters->denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, l->counters->denied_resp); - break; - case ST_F_EREQ: - metric = mkf_u64(FN_COUNTER, l->counters->failed_req); - break; - case ST_F_DCON: - metric = mkf_u64(FN_COUNTER, l->counters->denied_conn); - break; - case ST_F_DSES: - metric = mkf_u64(FN_COUNTER, l->counters->denied_sess); - break; - case ST_F_STATUS: - metric = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, l->luid); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, l->counters->failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, l->counters->internal_errors); - break; - case ST_F_ADDR: - if (flags & STAT_SHLGNDS) { - char str[INET6_ADDRSTRLEN]; - int port; - - port = get_host_port(&l->rx.addr); - switch (addr_to_str(&l->rx.addr, str, sizeof(str))) { - case AF_INET: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "%s:%d", str, port); - break; - case AF_INET6: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "[%s]:%d", str, port); - break; - case AF_UNIX: - metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); - break; - case -1: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_strcat(out, strerror(errno)); - break; - default: /* address family not supported */ - break; - } - } - break; - case ST_F_PROTO: - metric = mkf_str(FO_STATUS, l->rx.proto->name); - break; - default: - /* not used for listen. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for listener <l> and proxy <px> to the local trash buffer and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. Returns non-zero if it emits - * anything, zero otherwise. - */ -static int stats_dump_li_stats(struct stconn *sc, struct proxy *px, struct listener *l) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_li_stats(px, l, ctx->flags, stats, - ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(l->extra_counters, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -enum srv_stats_state { - SRV_STATS_STATE_DOWN = 0, - SRV_STATS_STATE_DOWN_AGENT, - SRV_STATS_STATE_GOING_UP, - SRV_STATS_STATE_UP_GOING_DOWN, - SRV_STATS_STATE_UP, - SRV_STATS_STATE_NOLB_GOING_DOWN, - SRV_STATS_STATE_NOLB, - SRV_STATS_STATE_DRAIN_GOING_DOWN, - SRV_STATS_STATE_DRAIN, - SRV_STATS_STATE_DRAIN_AGENT, - SRV_STATS_STATE_NO_CHECK, - - SRV_STATS_STATE_COUNT, /* Must be last */ -}; - -static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = { - [SRV_STATS_STATE_DOWN] = "DOWN", - [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)", - [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d", - [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d", - [SRV_STATS_STATE_UP] = "UP", - [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d", - [SRV_STATS_STATE_NOLB] = "NOLB", - [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d", - [SRV_STATS_STATE_DRAIN] = "DRAIN", - [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)", - [SRV_STATS_STATE_NO_CHECK] = "no check" -}; - -/* Compute server state helper - */ -static void stats_fill_sv_stats_computestate(struct server *sv, struct server *ref, - enum srv_stats_state *state) -{ - if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) { - if ((ref->check.state & CHK_ST_ENABLED) && - (ref->check.health < ref->check.rise + ref->check.fall - 1)) { - *state = SRV_STATS_STATE_UP_GOING_DOWN; - } else { - *state = SRV_STATS_STATE_UP; - } - - if (sv->cur_admin & SRV_ADMF_DRAIN) { - if (ref->agent.state & CHK_ST_ENABLED) - *state = SRV_STATS_STATE_DRAIN_AGENT; - else if (*state == SRV_STATS_STATE_UP_GOING_DOWN) - *state = SRV_STATS_STATE_DRAIN_GOING_DOWN; - else - *state = SRV_STATS_STATE_DRAIN; - } - - if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) { - *state = SRV_STATS_STATE_NO_CHECK; - } - } - else if (sv->cur_state == SRV_ST_STOPPING) { - if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) || - (ref->check.health == ref->check.rise + ref->check.fall - 1)) { - *state = SRV_STATS_STATE_NOLB; - } else { - *state = SRV_STATS_STATE_NOLB_GOING_DOWN; - } - } - else { /* stopped */ - if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) { - *state = SRV_STATS_STATE_DOWN_AGENT; - } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) { - *state = SRV_STATS_STATE_DOWN; /* DOWN */ - } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) { - *state = SRV_STATS_STATE_GOING_UP; - } else { - *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */ - } - } -} - -/* Fill <stats> with the backend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for servers, the - * function returns 0, otherwise, it returns 1. <flags> can take the value - * STAT_SHLGNDS. - */ -int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags, - struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - struct server *via = sv->track ? sv->track : sv; - struct server *ref = via; - enum srv_stats_state state = 0; - char str[INET6_ADDRSTRLEN]; - struct buffer *out = get_trash_chunk(); - char *fld_status; - long long srv_samples_counter; - unsigned int srv_samples_window = TIME_STATS_SAMPLES; - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - chunk_reset(out); - - /* compute state for later use */ - if (selected_field == NULL || *selected_field == ST_F_STATUS || - *selected_field == ST_F_CHECK_RISE || *selected_field == ST_F_CHECK_FALL || - *selected_field == ST_F_CHECK_HEALTH || *selected_field == ST_F_HANAFAIL) { - /* we have "via" which is the tracked server as described in the configuration, - * and "ref" which is the checked server and the end of the chain. - */ - while (ref->track) - ref = ref->track; - stats_fill_sv_stats_computestate(sv, ref, &state); - } - - /* compue time values for later use */ - if (selected_field == NULL || *selected_field == ST_F_QTIME || - *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME || - *selected_field == ST_F_TTIME) { - srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn; - if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0) - srv_samples_window = srv_samples_counter; - } - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_QCUR: - metric = mkf_u32(0, sv->queue.length); - break; - case ST_F_QMAX: - metric = mkf_u32(FN_MAX, sv->counters.nbpend_max); - break; - case ST_F_SCUR: - metric = mkf_u32(0, sv->cur_sess); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, sv->counters.cur_sess_max); - break; - case ST_F_SLIM: - if (sv->maxconn) - metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn); - break; - case ST_F_SRV_ICUR: - metric = mkf_u32(0, sv->curr_idle_conns); - break; - case ST_F_SRV_ILIM: - if (sv->max_idle_conns != -1) - metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, sv->counters.cum_sess); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, sv->counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, sv->counters.bytes_out); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, sv->counters.denied_resp); - break; - case ST_F_ECON: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_conns); - break; - case ST_F_ERESP: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_resp); - break; - case ST_F_WRETR: - metric = mkf_u64(FN_COUNTER, sv->counters.retries); - break; - case ST_F_WREDIS: - metric = mkf_u64(FN_COUNTER, sv->counters.redispatches); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, sv->counters.internal_errors); - break; - case ST_F_CONNECT: - metric = mkf_u64(FN_COUNTER, sv->counters.connect); - break; - case ST_F_REUSE: - metric = mkf_u64(FN_COUNTER, sv->counters.reuse); - break; - case ST_F_IDLE_CONN_CUR: - metric = mkf_u32(0, sv->curr_idle_nb); - break; - case ST_F_SAFE_CONN_CUR: - metric = mkf_u32(0, sv->curr_safe_nb); - break; - case ST_F_USED_CONN_CUR: - metric = mkf_u32(0, sv->curr_used_conns); - break; - case ST_F_NEED_CONN_EST: - metric = mkf_u32(0, sv->est_need_conns); - break; - case ST_F_STATUS: - fld_status = chunk_newstr(out); - if (sv->cur_admin & SRV_ADMF_RMAINT) - chunk_appendf(out, "MAINT (resolution)"); - else if (sv->cur_admin & SRV_ADMF_IMAINT) - chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id); - else if (sv->cur_admin & SRV_ADMF_MAINT) - chunk_appendf(out, "MAINT"); - else - chunk_appendf(out, - srv_hlt_st[state], - (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), - (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise)); - - metric = mkf_str(FO_STATUS, fld_status); - break; - case ST_F_LASTCHG: - metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - sv->last_change); - break; - case ST_F_WEIGHT: - metric = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); - break; - case ST_F_UWEIGHT: - metric = mkf_u32(FN_AVG, sv->uweight); - break; - case ST_F_ACT: - metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1); - break; - case ST_F_BCK: - metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0); - break; - case ST_F_CHKFAIL: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks); - break; - case ST_F_CHKDOWN: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u64(FN_COUNTER, sv->counters.down_trans); - break; - case ST_F_DOWNTIME: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u32(FN_COUNTER, srv_downtime(sv)); - break; - case ST_F_QLIMIT: - if (sv->maxqueue) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, sv->puid); - break; - case ST_F_SRID: - metric = mkf_u32(FN_COUNTER, sv->rid); - break; - case ST_F_THROTTLE: - if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv)) - metric = mkf_u32(FN_AVG, server_throttle_rate(sv)); - break; - case ST_F_LBTOT: - metric = mkf_u64(FN_COUNTER, sv->counters.cum_lbconn); - break; - case ST_F_TRACKED: - if (sv->track) { - char *fld_track = chunk_newstr(out); - chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id); - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track); - } - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV); - break; - case ST_F_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&sv->sess_per_sec)); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(FN_MAX, sv->counters.sps_max); - break; - case ST_F_CHECK_STATUS: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { - const char *fld_chksts; - - fld_chksts = chunk_newstr(out); - chunk_strcat(out, "* "); // for check in progress - chunk_strcat(out, get_check_status_info(sv->check.status)); - if (!(sv->check.state & CHK_ST_INPROGRESS)) - fld_chksts += 2; // skip "* " - metric = mkf_str(FN_OUTPUT, fld_chksts); - } - break; - case ST_F_CHECK_CODE: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - sv->check.status >= HCHK_STATUS_L57DATA) - metric = mkf_u32(FN_OUTPUT, sv->check.code); - break; - case ST_F_CHECK_DURATION: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - sv->check.status >= HCHK_STATUS_CHECKED) - metric = mkf_u64(FN_DURATION, MAX(sv->check.duration, 0)); - break; - case ST_F_CHECK_DESC: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status)); - break; - case ST_F_LAST_CHK: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, sv->check.desc); - break; - case ST_F_CHECK_RISE: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise); - break; - case ST_F_CHECK_FALL: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall); - break; - case ST_F_CHECK_HEALTH: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health); - break; - case ST_F_AGENT_STATUS: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { - const char *fld_chksts; - - fld_chksts = chunk_newstr(out); - chunk_strcat(out, "* "); // for check in progress - chunk_strcat(out, get_check_status_info(sv->agent.status)); - if (!(sv->agent.state & CHK_ST_INPROGRESS)) - fld_chksts += 2; // skip "* " - metric = mkf_str(FN_OUTPUT, fld_chksts); - } - break; - case ST_F_AGENT_CODE: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - (sv->agent.status >= HCHK_STATUS_L57DATA)) - metric = mkf_u32(FN_OUTPUT, sv->agent.code); - break; - case ST_F_AGENT_DURATION: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u64(FN_DURATION, sv->agent.duration); - break; - case ST_F_AGENT_DESC: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status)); - break; - case ST_F_LAST_AGT: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, sv->agent.desc); - break; - case ST_F_AGENT_RISE: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise); - break; - case ST_F_AGENT_FALL: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall); - break; - case ST_F_AGENT_HEALTH: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health); - break; - case ST_F_REQ_TOT: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.cum_req); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[0]); - break; - case ST_F_HANAFAIL: - if (ref->observe) - metric = mkf_u64(FN_COUNTER, sv->counters.failed_hana); - break; - case ST_F_CLI_ABRT: - metric = mkf_u64(FN_COUNTER, sv->counters.cli_aborts); - break; - case ST_F_SRV_ABRT: - metric = mkf_u64(FN_COUNTER, sv->counters.srv_aborts); - break; - case ST_F_LASTSESS: - metric = mkf_s32(FN_AGE, srv_lastsession(sv)); - break; - case ST_F_QTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window)); - break; - case ST_F_CTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window)); - break; - case ST_F_RTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window)); - break; - case ST_F_TTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window)); - break; - case ST_F_QT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.qtime_max); - break; - case ST_F_CT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.ctime_max); - break; - case ST_F_RT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.dtime_max); - break; - case ST_F_TT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.ttime_max); - break; - case ST_F_ADDR: - if (flags & STAT_SHLGNDS) { - switch (addr_to_str(&sv->addr, str, sizeof(str))) { - case AF_INET: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "%s:%d", str, sv->svc_port); - break; - case AF_INET6: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "[%s]:%d", str, sv->svc_port); - break; - case AF_UNIX: - metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); - break; - case -1: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_strcat(out, strerror(errno)); - break; - default: /* address family not supported */ - break; - } - } - break; - case ST_F_COOKIE: - if (flags & STAT_SHLGNDS && sv->cookie) - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie); - break; - default: - /* not used for servers. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for server <sv> and proxy <px> to the local trash vbuffer and - * uses the state from stream connector <sc>, and server state <state>. The - * caller is responsible for clearing the local trash buffer if needed. Returns - * non-zero if it emits anything, zero otherwise. - */ -static int stats_dump_sv_stats(struct stconn *sc, struct proxy *px, struct server *sv) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct stats_module *mod; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - size_t stats_count = ST_F_TOTAL_FIELDS; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_sv_stats(px, sv, ctx->flags, stats, - ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) - continue; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Helper to compute srv values for a given backend - */ -static void stats_fill_be_stats_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw) -{ - int nbup_tmp, nbsrv_tmp, totuw_tmp; - const struct server *srv; - - nbup_tmp = nbsrv_tmp = totuw_tmp = 0; - for (srv = px->srv; srv; srv = srv->next) { - if (srv->cur_state != SRV_ST_STOPPED) { - nbup_tmp++; - if (srv_currently_usable(srv) && - (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP))) - totuw_tmp += srv->uweight; - } - nbsrv_tmp++; - } - - HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); - if (!px->srv_act && px->lbprm.fbck) - totuw_tmp = px->lbprm.fbck->uweight; - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); - - /* use tmp variable then assign result to make gcc happy */ - *nbup = nbup_tmp; - *nbsrv = nbsrv_tmp; - *totuw = totuw_tmp; -} - -/* Fill <stats> with the backend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for backends, the - * function returns 0, otherwise, it returns 1. <flags> can take the value - * STAT_SHLGNDS. - */ -int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - long long be_samples_counter; - unsigned int be_samples_window = TIME_STATS_SAMPLES; - struct buffer *out = get_trash_chunk(); - int nbup, nbsrv, totuw; - char *fld; - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - nbup = nbsrv = totuw = 0; - /* some srv values compute for later if we either select all fields or - * need them for one of the mentioned ones */ - if (selected_field == NULL || *selected_field == ST_F_STATUS || - *selected_field == ST_F_UWEIGHT) - stats_fill_be_stats_computesrv(px, &nbup, &nbsrv, &totuw); - - /* same here but specific to time fields */ - if (selected_field == NULL || *selected_field == ST_F_QTIME || - *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME || - *selected_field == ST_F_TTIME) { - be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn; - if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0) - be_samples_window = be_samples_counter; - } - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND"); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_QCUR: - metric = mkf_u32(0, px->queue.length); - break; - case ST_F_QMAX: - metric = mkf_u32(FN_MAX, px->be_counters.nbpend_max); - break; - case ST_F_SCUR: - metric = mkf_u32(0, px->beconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, px->be_counters.conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cum_conn); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, px->be_counters.denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, px->be_counters.denied_resp); - break; - case ST_F_ECON: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_conns); - break; - case ST_F_ERESP: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_resp); - break; - case ST_F_WRETR: - metric = mkf_u64(FN_COUNTER, px->be_counters.retries); - break; - case ST_F_WREDIS: - metric = mkf_u64(FN_COUNTER, px->be_counters.redispatches); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, px->be_counters.internal_errors); - break; - case ST_F_CONNECT: - metric = mkf_u64(FN_COUNTER, px->be_counters.connect); - break; - case ST_F_REUSE: - metric = mkf_u64(FN_COUNTER, px->be_counters.reuse); - break; - case ST_F_STATUS: - fld = chunk_newstr(out); - chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN"); - if (flags & (STAT_HIDE_MAINT|STAT_HIDE_DOWN)) - chunk_appendf(out, " (%d/%d)", nbup, nbsrv); - metric = mkf_str(FO_STATUS, fld); - break; - case ST_F_AGG_SRV_CHECK_STATUS: // DEPRECATED - case ST_F_AGG_SRV_STATUS: - metric = mkf_u32(FN_GAUGE, 0); - break; - case ST_F_AGG_CHECK_STATUS: - metric = mkf_u32(FN_GAUGE, 0); - break; - case ST_F_WEIGHT: - metric = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); - break; - case ST_F_UWEIGHT: - metric = mkf_u32(FN_AVG, totuw); - break; - case ST_F_ACT: - metric = mkf_u32(0, px->srv_act); - break; - case ST_F_BCK: - metric = mkf_u32(0, px->srv_bck); - break; - case ST_F_CHKDOWN: - metric = mkf_u64(FN_COUNTER, px->down_trans); - break; - case ST_F_LASTCHG: - metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - px->last_change); - break; - case ST_F_DOWNTIME: - if (px->srv) - metric = mkf_u32(FN_COUNTER, be_downtime(px)); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, 0); - break; - case ST_F_LBTOT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cum_lbconn); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE); - break; - case ST_F_RATE: - metric = mkf_u32(0, read_freq_ctr(&px->be_sess_per_sec)); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(0, px->be_counters.sps_max); - break; - case ST_F_COOKIE: - if (flags & STAT_SHLGNDS && px->cookie_name) - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name); - break; - case ST_F_ALGO: - if (flags & STAT_SHLGNDS) - metric = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO)); - break; - case ST_F_REQ_TOT: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cum_req); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[0]); - break; - case ST_F_CACHE_LOOKUPS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_lookups); - break; - case ST_F_CACHE_HITS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_hits); - break; - case ST_F_CLI_ABRT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cli_aborts); - break; - case ST_F_SRV_ABRT: - metric = mkf_u64(FN_COUNTER, px->be_counters.srv_aborts); - break; - case ST_F_COMP_IN: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_in[COMP_DIR_RES]); - break; - case ST_F_COMP_OUT: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_out[COMP_DIR_RES]); - break; - case ST_F_COMP_BYP: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_byp[COMP_DIR_RES]); - break; - case ST_F_COMP_RSP: - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.comp_rsp); - break; - case ST_F_LASTSESS: - metric = mkf_s32(FN_AGE, be_lastsession(px)); - break; - case ST_F_QTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window)); - break; - case ST_F_CTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window)); - break; - case ST_F_RTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window)); - break; - case ST_F_TTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window)); - break; - case ST_F_QT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.qtime_max); - break; - case ST_F_CT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.ctime_max); - break; - case ST_F_RT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.dtime_max); - break; - case ST_F_TT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.ttime_max); - break; - default: - /* not used for backends. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for backend <px> to the local trash buffer for and uses the - * state from stream interface <si>. The caller is responsible for clearing the - * local trash buffer if needed. Returns non-zero if it emits anything, zero - * otherwise. - */ -static int stats_dump_be_stats(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - if (!(px->cap & PR_CAP_BE)) - return 0; - - if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE))) - return 0; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_be_stats(px, ctx->flags, stats, ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - struct extra_counters *counters; - - if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) - continue; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Dumps the HTML table header for proxy <px> to the local trash buffer for and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - struct stats_module *mod; - int stats_module_len = 0; - - if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_ADMIN)) { - /* A form to enable/disable this proxy servers */ - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - chunk_appendf(&trash_chunk, - "<form method=\"post\">"); - } - - /* print a new table */ - chunk_appendf(&trash_chunk, - "<table class=\"tbl\" width=\"100%%\">\n" - "<tr class=\"titre\">" - "<th class=\"pxname\" width=\"10%%\">"); - - chunk_appendf(&trash_chunk, - "<a name=\"%s\"></a>%s" - "<a class=px href=\"#%s\">%s</a>", - px->id, - (ctx->flags & STAT_SHLGNDS) ? "<u>":"", - px->id, px->id); - - if (ctx->flags & STAT_SHLGNDS) { - /* cap, mode, id */ - chunk_appendf(&trash_chunk, "<div class=tips>cap: %s, mode: %s, id: %d", - proxy_cap_str(px->cap), proxy_mode_str(px->mode), - px->uuid); - chunk_appendf(&trash_chunk, "</div>"); - } - - chunk_appendf(&trash_chunk, - "%s</th>" - "<th class=\"%s\" width=\"90%%\">%s</th>" - "</tr>\n" - "</table>\n" - "<table class=\"tbl\" width=\"100%%\">\n" - "<tr class=\"titre\">", - (ctx->flags & STAT_SHLGNDS) ? "</u>":"", - px->desc ? "desc" : "empty", px->desc ? px->desc : ""); - - if (ctx->flags & STAT_ADMIN) { - /* Column heading for Enable or Disable server */ - if ((px->cap & PR_CAP_BE) && px->srv) - chunk_appendf(&trash_chunk, - "<th rowspan=2 width=1><input type=\"checkbox\" " - "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) " - "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>", - px->id, - px->id); - else - chunk_appendf(&trash_chunk, "<th rowspan=2></th>"); - } - - chunk_appendf(&trash_chunk, - "<th rowspan=2></th>" - "<th colspan=3>Queue</th>" - "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>" - "<th colspan=2>Bytes</th><th colspan=2>Denied</th>" - "<th colspan=3>Errors</th><th colspan=2>Warnings</th>" - "<th colspan=9>Server</th>"); - - if (ctx->flags & STAT_SHMODULES) { - // calculate the count of module for colspan attribute - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - ++stats_module_len; - } - chunk_appendf(&trash_chunk, "<th colspan=%d>Extra modules</th>", - stats_module_len); - } - - chunk_appendf(&trash_chunk, - "</tr>\n" - "<tr class=\"titre\">" - "<th>Cur</th><th>Max</th><th>Limit</th>" - "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>" - "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>" - "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>" - "<th>Resp</th><th>Retr</th><th>Redis</th>" - "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>" - "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>" - "<th>Thrtle</th>\n"); - - if (ctx->flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(&trash_chunk, "<th>%s</th>", mod->name); - } - } - - chunk_appendf(&trash_chunk, "</tr>"); -} - -/* Dumps the HTML table trailer for proxy <px> to the local trash buffer for and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_px_end(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - - chunk_appendf(&trash_chunk, "</table>"); - - if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_ADMIN)) { - /* close the form used to enable/disable this proxy servers */ - chunk_appendf(&trash_chunk, - "Choose the action to perform on the checked servers : " - "<select name=action>" - "<option value=\"\"></option>" - "<option value=\"ready\">Set state to READY</option>" - "<option value=\"drain\">Set state to DRAIN</option>" - "<option value=\"maint\">Set state to MAINT</option>" - "<option value=\"dhlth\">Health: disable checks</option>" - "<option value=\"ehlth\">Health: enable checks</option>" - "<option value=\"hrunn\">Health: force UP</option>" - "<option value=\"hnolb\">Health: force NOLB</option>" - "<option value=\"hdown\">Health: force DOWN</option>" - "<option value=\"dagent\">Agent: disable checks</option>" - "<option value=\"eagent\">Agent: enable checks</option>" - "<option value=\"arunn\">Agent: force UP</option>" - "<option value=\"adown\">Agent: force DOWN</option>" - "<option value=\"shutdown\">Kill Sessions</option>" - "</select>" - "<input type=\"hidden\" name=\"b\" value=\"#%d\">" - " <input type=\"submit\" value=\"Apply\">" - "</form>", - px->uuid); - } - - chunk_appendf(&trash_chunk, "<p>\n"); -} - -/* - * Dumps statistics for a proxy. The output is sent to the stream connector's - * input buffer. Returns 0 if it had to stop dumping data because of lack of - * buffer space, or non-zero if everything completed. This function is used - * both by the CLI and the HTTP entry points, and is able to dump the output - * in HTML or CSV formats. - */ -int stats_dump_proxy_to_buffer(struct stconn *sc, struct htx *htx, - struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *rep = sc_ic(sc); - struct server *sv, *svs; /* server and server-state, server-state=server or server->track */ - struct listener *l; - struct uri_auth *uri = NULL; - int current_field; - int px_st = ctx->px_st; - - if (ctx->http_px) - uri = ctx->http_px->uri_auth; - chunk_reset(&trash_chunk); -more: - current_field = ctx->field; - - switch (ctx->px_st) { - case STAT_PX_ST_INIT: - /* we are on a new proxy */ - if (uri && uri->scope) { - /* we have a limited scope, we have to check the proxy name */ - struct stat_scope *scope; - int len; - - len = strlen(px->id); - scope = uri->scope; - - while (scope) { - /* match exact proxy name */ - if (scope->px_len == len && !memcmp(px->id, scope->px_id, len)) - break; - - /* match '.' which means 'self' proxy */ - if (strcmp(scope->px_id, ".") == 0 && px == ctx->http_px) - break; - scope = scope->next; - } - - /* proxy name not found : don't dump anything */ - if (scope == NULL) - return 1; - } - - /* if the user has requested a limited output and the proxy - * name does not match, skip it. - */ - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL) - return 1; - } - - if ((ctx->flags & STAT_BOUND) && - (ctx->iid != -1) && - (px->uuid != ctx->iid)) - return 1; - - ctx->px_st = STAT_PX_ST_TH; - __fallthrough; - - case STAT_PX_ST_TH: - if (ctx->flags & STAT_FMT_HTML) { - stats_dump_html_px_hdr(sc, px); - if (!stats_putchk(appctx, htx)) - goto full; - } - - ctx->px_st = STAT_PX_ST_FE; - __fallthrough; - - case STAT_PX_ST_FE: - /* print the frontend */ - if (stats_dump_fe_stats(sc, px)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - - current_field = 0; - ctx->obj2 = px->conf.listeners.n; - ctx->px_st = STAT_PX_ST_LI; - __fallthrough; - - case STAT_PX_ST_LI: - /* obj2 points to listeners list as initialized above */ - for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) { - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); - if (!l->counters) - continue; - - if (ctx->flags & STAT_BOUND) { - if (!(ctx->type & (1 << STATS_TYPE_SO))) - break; - - if (ctx->sid != -1 && l->luid != ctx->sid) - continue; - } - - /* print the frontend */ - if (stats_dump_li_stats(sc, px, l)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - current_field = 0; - } - - ctx->obj2 = px->srv; /* may be NULL */ - ctx->px_st = STAT_PX_ST_SV; - __fallthrough; - - case STAT_PX_ST_SV: - /* check for dump resumption */ - if (px_st == STAT_PX_ST_SV) { - struct server *cur = ctx->obj2; - - /* re-entrant dump */ - BUG_ON(!cur); - if (cur->flags & SRV_F_DELETED) { - /* the server could have been marked as deleted - * between two dumping attempts, skip it. - */ - cur = cur->next; - } - srv_drop(ctx->obj2); /* drop old srv taken on last dumping attempt */ - ctx->obj2 = cur; /* could be NULL */ - /* back to normal */ - } - - /* obj2 points to servers list as initialized above. - * - * A server may be removed during the stats dumping. - * Temporarily increment its refcount to prevent its - * anticipated cleaning. Call srv_drop() to release it. - */ - for (; ctx->obj2 != NULL; - ctx->obj2 = srv_drop(sv)) { - - sv = ctx->obj2; - srv_take(sv); - - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - if (ctx->flags & STAT_BOUND) { - if (!(ctx->type & (1 << STATS_TYPE_SV))) { - srv_drop(sv); - break; - } - - if (ctx->sid != -1 && sv->puid != ctx->sid) - continue; - } - - /* do not report disabled servers */ - if (ctx->flags & STAT_HIDE_MAINT && - sv->cur_admin & SRV_ADMF_MAINT) { - continue; - } - - svs = sv; - while (svs->track) - svs = svs->track; - - /* do not report servers which are DOWN and not changing state */ - if ((ctx->flags & STAT_HIDE_DOWN) && - ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */ - (sv->cur_state == SRV_ST_STOPPED && /* server is down */ - (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) || - ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) || - ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) { - continue; - } - - if (stats_dump_sv_stats(sc, px, sv)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - current_field = 0; - } /* for sv */ - - ctx->px_st = STAT_PX_ST_BE; - __fallthrough; - - case STAT_PX_ST_BE: - /* print the backend */ - if (stats_dump_be_stats(sc, px)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - - current_field = 0; - ctx->px_st = STAT_PX_ST_END; - __fallthrough; - - case STAT_PX_ST_END: - if (ctx->flags & STAT_FMT_HTML) { - stats_dump_html_px_end(sc, px); - if (!stats_putchk(appctx, htx)) - goto full; - } - - ctx->px_st = STAT_PX_ST_FIN; - __fallthrough; - - case STAT_PX_ST_FIN: - return 1; - - default: - /* unknown state, we should put an abort() here ! */ - return 1; - } - - full: - /* restore previous field */ - ctx->field = current_field; - return 0; -} - -/* Dumps the HTTP stats head block to the local trash buffer and uses the - * per-uri parameters from the parent proxy. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_head(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - struct uri_auth *uri; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - /* WARNING! This must fit in the first buffer !!! */ - chunk_appendf(&trash_chunk, - "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n" - "\"http://www.w3.org/TR/html4/loose.dtd\">\n" - "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n" - "<link rel=\"icon\" href=\"data:,\">\n" - "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n" - "<style type=\"text/css\"><!--\n" - "body {" - " font-family: arial, helvetica, sans-serif;" - " font-size: 12px;" - " font-weight: normal;" - " color: black;" - " background: white;" - "}\n" - "th,td {" - " font-size: 10px;" - "}\n" - "h1 {" - " font-size: x-large;" - " margin-bottom: 0.5em;" - "}\n" - "h2 {" - " font-family: helvetica, arial;" - " font-size: x-large;" - " font-weight: bold;" - " font-style: italic;" - " color: #6020a0;" - " margin-top: 0em;" - " margin-bottom: 0em;" - "}\n" - "h3 {" - " font-family: helvetica, arial;" - " font-size: 16px;" - " font-weight: bold;" - " color: #b00040;" - " background: #e8e8d0;" - " margin-top: 0em;" - " margin-bottom: 0em;" - "}\n" - "li {" - " margin-top: 0.25em;" - " margin-right: 2em;" - "}\n" - ".hr {margin-top: 0.25em;" - " border-color: black;" - " border-bottom-style: solid;" - "}\n" - ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n" - ".total {background: #20D0D0;color: #ffff80;}\n" - ".frontend {background: #e8e8d0;}\n" - ".socket {background: #d0d0d0;}\n" - ".backend {background: #e8e8d0;}\n" - ".active_down {background: #ff9090;}\n" - ".active_going_up {background: #ffd020;}\n" - ".active_going_down {background: #ffffa0;}\n" - ".active_up {background: #c0ffc0;}\n" - ".active_nolb {background: #20a0ff;}\n" - ".active_draining {background: #20a0FF;}\n" - ".active_no_check {background: #e0e0e0;}\n" - ".backup_down {background: #ff9090;}\n" - ".backup_going_up {background: #ff80ff;}\n" - ".backup_going_down {background: #c060ff;}\n" - ".backup_up {background: #b0d0ff;}\n" - ".backup_nolb {background: #90b0e0;}\n" - ".backup_draining {background: #cc9900;}\n" - ".backup_no_check {background: #e0e0e0;}\n" - ".maintain {background: #c07820;}\n" - ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */ - "\n" - "a.px:link {color: #ffff40; text-decoration: none;}" - "a.px:visited {color: #ffff40; text-decoration: none;}" - "a.px:hover {color: #ffffff; text-decoration: none;}" - "a.lfsb:link {color: #000000; text-decoration: none;}" - "a.lfsb:visited {color: #000000; text-decoration: none;}" - "a.lfsb:hover {color: #505050; text-decoration: none;}" - "\n" - "table.tbl { border-collapse: collapse; border-style: none;}\n" - "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n" - "table.tbl td.ac { text-align: center;}\n" - "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n" - "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n" - "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n" - "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n" - "\n" - "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n" - "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n" - "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n" - "table.det { border-collapse: collapse; border-style: none; }\n" - "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n" - "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n" - "u {text-decoration:none; border-bottom: 1px dotted black;}\n" - "div.tips {\n" - " display:block;\n" - " visibility:hidden;\n" - " z-index:2147483647;\n" - " position:absolute;\n" - " padding:2px 4px 3px;\n" - " background:#f0f060; color:#000000;\n" - " border:1px solid #7040c0;\n" - " white-space:nowrap;\n" - " font-style:normal;font-size:11px;font-weight:normal;\n" - " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n" - " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n" - "}\n" - "u:hover div.tips {visibility:visible;}\n" - "@media (prefers-color-scheme: dark) {\n" - " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n" - " h1 { color: #a265e0!important; }\n" - " h2 { color: #a265e0; }\n" - " h3 { color: #ff5190; background-color: #3e3e1f; }\n" - " a { color: #3391ff; }\n" - " input { background-color: #2f3437; }\n" - " .hr { border-color: #8c8273; }\n" - " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n" - " .frontend {background: #2f3437;}\n" - " .socket {background: #2a2d2f;}\n" - " .backend {background: #2f3437;}\n" - " .active_down {background: #760000;}\n" - " .active_going_up {background: #b99200;}\n" - " .active_going_down {background: #6c6c00;}\n" - " .active_up {background: #165900;}\n" - " .active_nolb {background: #006ab9;}\n" - " .active_draining {background: #006ab9;}\n" - " .active_no_check {background: #2a2d2f;}\n" - " .backup_down {background: #760000;}\n" - " .backup_going_up {background: #7f007f;}\n" - " .backup_going_down {background: #580092;}\n" - " .backup_up {background: #2e3234;}\n" - " .backup_nolb {background: #1e3c6a;}\n" - " .backup_draining {background: #a37a00;}\n" - " .backup_no_check {background: #2a2d2f;}\n" - " .maintain {background: #9a601a;}\n" - " a.px:link {color: #d8d83b; text-decoration: none;}\n" - " a.px:visited {color: #d8d83b; text-decoration: none;}\n" - " a.px:hover {color: #ffffff; text-decoration: none;}\n" - " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n" - " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n" - " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n" - " table.tbl th.empty { background-color: #181a1b; }\n" - " table.tbl th.desc { background: #181a1b; }\n" - " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n" - " table.tbl th { border-color: #808080; }\n" - " table.tbl td { border-color: #808080; }\n" - " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n" - " div.tips {\n" - " background:#8e8e0d;\n" - " color:#e8e6e3;\n" - " border-color: #4e2c86;\n" - " -moz-box-shadow: #60686c 2px 2px 3px;\n" - " -webkit-box-shadow: #60686c 2px 2px 3px;\n" - " box-shadow: #60686c 2px 2px 3px;\n" - " }\n" - "}\n" - "-->\n" - "</style></head>\n", - (ctx->flags & STAT_SHNODE) ? " on " : "", - (ctx->flags & STAT_SHNODE) ? (uri && uri->node ? uri->node : global.node) : "" - ); -} - -/* Dumps the HTML stats information block to the local trash buffer and uses - * the state from stream connector <sc> and per-uri parameter from the parent - * proxy. The caller is responsible for clearing the local trash buffer if - * needed. - */ -static void stats_dump_html_info(struct stconn *sc) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - unsigned int up = ns_to_sec(now_ns - start_time_ns); - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - const char *scope_ptr = stats_scope_ptr(appctx, sc); - struct uri_auth *uri; - unsigned long long bps; - int thr; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - for (bps = thr = 0; thr < global.nbthread; thr++) - bps += 32ULL * read_freq_ctr(&ha_thread_ctx[thr].out_32bps); - - /* Turn the bytes per second to bits per second and take care of the - * usual ethernet overhead in order to help figure how far we are from - * interface saturation since it's the only case which usually matters. - * For this we count the total size of an Ethernet frame on the wire - * including preamble and IFG (1538) for the largest TCP segment it - * transports (1448 with TCP timestamps). This is not valid for smaller - * packets (under-estimated), but it gives a reasonably accurate - * estimation of how far we are from uplink saturation. - */ - bps = bps * 8 * 1538 / 1448; - - /* WARNING! this has to fit the first packet too. - * We are around 3.5 kB, add adding entries will - * become tricky if we want to support 4kB buffers ! - */ - chunk_appendf(&trash_chunk, - "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">" - PRODUCT_NAME "%s</a></h1>\n" - "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n" - "<hr width=\"100%%\" class=\"hr\">\n" - "<h3>> General process information</h3>\n" - "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n" - "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n" - "<b>uptime = </b> %dd %dh%02dm%02ds; warnings = %u<br>\n" - "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n" - "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>reached = </b> %llu; <b>maxpipes = </b> %d<br>\n" - "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n" - "Running tasks: %d/%d (%d niced); idle = %d %%<br>\n" - "</td><td align=\"center\" nowrap>\n" - "<table class=\"lgd\"><tr>\n" - "<td class=\"active_up\"> </td><td class=\"noborder\">active UP </td>" - "<td class=\"backup_up\"> </td><td class=\"noborder\">backup UP </td>" - "</tr><tr>\n" - "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>" - "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>" - "</tr><tr>\n" - "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>" - "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>" - "</tr><tr>\n" - "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN </td>" - "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>" - "</tr><tr>\n" - "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) </td>" - "</tr><tr>\n" - "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance </td>" - "</tr></table>\n" - "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled." - "</td>" - "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" - "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">" - "", - (ctx->flags & STAT_HIDEVER) ? "" : (stats_version_string), - pid, (ctx->flags & STAT_SHNODE) ? " on " : "", - (ctx->flags & STAT_SHNODE) ? (uri->node ? uri->node : global.node) : "", - (ctx->flags & STAT_SHDESC) ? ": " : "", - (ctx->flags & STAT_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "", - pid, 1, 1, global.nbthread, - up / 86400, (up % 86400) / 3600, - (up % 3600) / 60, (up % 60), - HA_ATOMIC_LOAD(&tot_warnings), - global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited", - global.rlimit_memmax ? " MB" : "", - global.rlimit_nofile, - global.maxsock, global.maxconn, HA_ATOMIC_LOAD(&maxconn_reached), global.maxpipes, - actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec), - bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0), - bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k', - total_run_queues(), total_allocated_tasks(), total_niced_running_tasks(), clock_report_idle()); - - /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - memcpy(scope_txt, scope_ptr, ctx->scope_len); - scope_txt[ctx->scope_len] = '\0'; - - chunk_appendf(&trash_chunk, - "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n", - (ctx->scope_len > 0) ? scope_txt : "", - STAT_SCOPE_TXT_MAXLEN); - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - if (ctx->flags & STAT_HIDE_DOWN) - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n", - uri->uri_prefix, - "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - else - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n", - uri->uri_prefix, - ";up", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - if (uri->refresh > 0) { - if (ctx->flags & STAT_NO_REFRESH) - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - "", - scope_txt); - else - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - ";norefresh", - scope_txt); - } - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n", - uri->uri_prefix, - (uri->refresh > 0) ? ";norefresh" : "", - scope_txt); - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n", - uri->uri_prefix, - (uri->refresh > 0) ? ";norefresh" : "", - scope_txt, uri->uri_prefix); - - chunk_appendf(&trash_chunk, - "</ul></td>" - "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" - "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n" - "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n" - "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n" - "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n" - "</ul>" - "</td>" - "</tr></table>\n" - "" - ); - - if (ctx->st_code) { - switch (ctx->st_code) { - case STAT_STATUS_DONE: - chunk_appendf(&trash_chunk, - "<p><div class=active_up>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action processed successfully." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_NONE: - chunk_appendf(&trash_chunk, - "<p><div class=active_going_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Nothing has changed." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_PART: - chunk_appendf(&trash_chunk, - "<p><div class=active_going_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action partially processed.<br>" - "Some server names are probably unknown or ambiguous (duplicated names in the backend)." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_ERRP: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action not processed because of invalid parameters." - "<ul>" - "<li>The action is maybe unknown.</li>" - "<li>Invalid key parameter (empty or too long).</li>" - "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>" - "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>" - "</ul>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_EXCD: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Action not processed : the buffer couldn't store all the data.<br>" - "You should retry with less servers at a time.</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_DENY: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Action denied.</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_IVAL: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Invalid requests (unsupported method or chunked encoded request).</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - default: - chunk_appendf(&trash_chunk, - "<p><div class=active_no_check>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Unexpected result." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - } - chunk_appendf(&trash_chunk, "<p>\n"); - } -} - -/* Dumps the HTML stats trailer block to the local trash buffer. The caller is - * responsible for clearing the local trash buffer if needed. - */ -static void stats_dump_html_end() -{ - chunk_appendf(&trash_chunk, "</body></html>\n"); -} - -/* Dumps the stats JSON header to the local trash buffer buffer which. The - * caller is responsible for clearing it if needed. - */ -static void stats_dump_json_header() -{ - chunk_strcat(&trash_chunk, "["); -} - - -/* Dumps the JSON stats trailer block to the local trash buffer. The caller is - * responsible for clearing the local trash buffer if needed. - */ -static void stats_dump_json_end() -{ - chunk_strcat(&trash_chunk, "]\n"); -} - -/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as - * a pointer to the current server/listener. - */ -static int stats_dump_proxies(struct stconn *sc, - struct htx *htx) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *rep = sc_ic(sc); - struct proxy *px; - - /* dump proxies */ - while (ctx->obj1) { - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - px = ctx->obj1; - /* Skip the global frontend proxies and non-networked ones. - * Also skip proxies that were disabled in the configuration - * This change allows retrieving stats from "old" proxies after a reload. - */ - if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 && - (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) { - if (stats_dump_proxy_to_buffer(sc, htx, px) == 0) - return 0; - } - - ctx->obj1 = px->next; - ctx->px_st = STAT_PX_ST_INIT; - ctx->field = 0; - } - - return 1; - - full: - return 0; -} - /* This function dumps statistics onto the stream connector's read buffer in * either CSV or HTML format. It returns 0 if it had to stop writing data and * an I/O is needed, 1 if the dump is finished and the stream must be closed, * or -1 in case of any error. This function is used by both the CLI and the * HTTP handlers. */ -static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) +int stats_dump_stat_to_buffer(struct stconn *sc, struct buffer *buf, struct htx *htx) { struct appctx *appctx = __sc_appctx(sc); struct show_stat_ctx *ctx = appctx->svcctx; enum stats_domain domain = ctx->domain; + struct buffer *chk = &ctx->chunk; - chunk_reset(&trash_chunk); + chunk_reset(chk); switch (ctx->state) { case STAT_STATE_INIT: @@ -3934,19 +546,21 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_HEAD: - if (ctx->flags & STAT_FMT_HTML) + if (ctx->flags & STAT_F_FMT_HTML) stats_dump_html_head(appctx); - else if (ctx->flags & STAT_JSON_SCHM) - stats_dump_json_schema(&trash_chunk); - else if (ctx->flags & STAT_FMT_JSON) - stats_dump_json_header(); - else if (!(ctx->flags & STAT_FMT_TYPED)) - stats_dump_csv_header(ctx->domain); - - if (!stats_putchk(appctx, htx)) + else if (ctx->flags & STAT_F_JSON_SCHM) + stats_dump_json_schema(chk); + else if (ctx->flags & STAT_F_FMT_JSON) + stats_dump_json_header(chk); + else if (ctx->flags & STAT_F_FMT_FILE) + stats_dump_file_header(ctx->type, chk); + else if (!(ctx->flags & STAT_F_FMT_TYPED)) + stats_dump_csv_header(ctx->domain, chk); + + if (!stats_putchk(appctx, buf, htx)) goto full; - if (ctx->flags & STAT_JSON_SCHM) { + if (ctx->flags & STAT_F_JSON_SCHM) { ctx->state = STAT_STATE_FIN; return 1; } @@ -3954,9 +568,9 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_INFO: - if (ctx->flags & STAT_FMT_HTML) { + if (ctx->flags & STAT_F_FMT_HTML) { stats_dump_html_info(sc); - if (!stats_putchk(appctx, htx)) + if (!stats_putchk(appctx, buf, htx)) goto full; } @@ -3971,8 +585,8 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) case STAT_STATE_LIST: switch (domain) { case STATS_DOMAIN_RESOLVERS: - if (!stats_dump_resolvers(sc, stat_l[domain], - stat_count[domain], + if (!stats_dump_resolvers(sc, stat_lines[domain], + stat_cols_len[domain], &stats_module_list[domain])) { return 0; } @@ -3981,7 +595,7 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) case STATS_DOMAIN_PROXY: default: /* dump proxies */ - if (!stats_dump_proxies(sc, htx)) + if (!stats_dump_proxies(sc, buf, htx)) return 0; break; } @@ -3990,12 +604,12 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_END: - if (ctx->flags & (STAT_FMT_HTML|STAT_FMT_JSON)) { - if (ctx->flags & STAT_FMT_HTML) - stats_dump_html_end(); + if (ctx->flags & (STAT_F_FMT_HTML|STAT_F_FMT_JSON)) { + if (ctx->flags & STAT_F_FMT_HTML) + stats_dump_html_end(chk); else - stats_dump_json_end(); - if (!stats_putchk(appctx, htx)) + stats_dump_json_end(chk); + if (!stats_putchk(appctx, buf, htx)) goto full; } @@ -4016,589 +630,23 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) } -/* We reached the stats page through a POST request. The appctx is - * expected to have already been allocated by the caller. - * Parse the posted data and enable/disable servers if necessary. - * Returns 1 if request was parsed or zero if it needs more data. - */ -static int stats_process_http_post(struct stconn *sc) -{ - struct stream *s = __sc_strm(sc); - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - - struct proxy *px = NULL; - struct server *sv = NULL; - - char key[LINESIZE]; - int action = ST_ADM_ACTION_NONE; - int reprocess = 0; - - int total_servers = 0; - int altered_servers = 0; - - char *first_param, *cur_param, *next_param, *end_params; - char *st_cur_param = NULL; - char *st_next_param = NULL; - - struct buffer *temp = get_trash_chunk(); - - struct htx *htx = htxbuf(&s->req.buf); - struct htx_blk *blk; - - /* we need more data */ - if (s->txn->req.msg_state < HTTP_MSG_DONE) { - /* check if we can receive more */ - if (htx_free_data_space(htx) <= global.tune.maxrewrite) { - ctx->st_code = STAT_STATUS_EXCD; - goto out; - } - goto wait; - } - - /* The request was fully received. Copy data */ - blk = htx_get_head_blk(htx); - while (blk) { - enum htx_blk_type type = htx_get_blk_type(blk); - - if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) - break; - if (type == HTX_BLK_DATA) { - struct ist v = htx_get_blk_value(htx, blk); - - if (!chunk_memcat(temp, v.ptr, v.len)) { - ctx->st_code = STAT_STATUS_EXCD; - goto out; - } - } - blk = htx_get_next_blk(htx, blk); - } - - first_param = temp->area; - end_params = temp->area + temp->data; - cur_param = next_param = end_params; - *end_params = '\0'; - - ctx->st_code = STAT_STATUS_NONE; - - /* - * Parse the parameters in reverse order to only store the last value. - * From the html form, the backend and the action are at the end. - */ - while (cur_param > first_param) { - char *value; - int poffset, plen; - - cur_param--; - - if ((*cur_param == '&') || (cur_param == first_param)) { - reprocess_servers: - /* Parse the key */ - poffset = (cur_param != first_param ? 1 : 0); - plen = next_param - cur_param + (cur_param == first_param ? 1 : 0); - if ((plen > 0) && (plen <= sizeof(key))) { - strncpy(key, cur_param + poffset, plen); - key[plen - 1] = '\0'; - } else { - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - - /* Parse the value */ - value = key; - while (*value != '\0' && *value != '=') { - value++; - } - if (*value == '=') { - /* Ok, a value is found, we can mark the end of the key */ - *value++ = '\0'; - } - if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0) - break; - - /* Now we can check the key to see what to do */ - if (!px && (strcmp(key, "b") == 0)) { - if ((px = proxy_be_by_name(value)) == NULL) { - /* the backend name is unknown or ambiguous (duplicate names) */ - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - } - else if (!action && (strcmp(key, "action") == 0)) { - if (strcmp(value, "ready") == 0) { - action = ST_ADM_ACTION_READY; - } - else if (strcmp(value, "drain") == 0) { - action = ST_ADM_ACTION_DRAIN; - } - else if (strcmp(value, "maint") == 0) { - action = ST_ADM_ACTION_MAINT; - } - else if (strcmp(value, "shutdown") == 0) { - action = ST_ADM_ACTION_SHUTDOWN; - } - else if (strcmp(value, "dhlth") == 0) { - action = ST_ADM_ACTION_DHLTH; - } - else if (strcmp(value, "ehlth") == 0) { - action = ST_ADM_ACTION_EHLTH; - } - else if (strcmp(value, "hrunn") == 0) { - action = ST_ADM_ACTION_HRUNN; - } - else if (strcmp(value, "hnolb") == 0) { - action = ST_ADM_ACTION_HNOLB; - } - else if (strcmp(value, "hdown") == 0) { - action = ST_ADM_ACTION_HDOWN; - } - else if (strcmp(value, "dagent") == 0) { - action = ST_ADM_ACTION_DAGENT; - } - else if (strcmp(value, "eagent") == 0) { - action = ST_ADM_ACTION_EAGENT; - } - else if (strcmp(value, "arunn") == 0) { - action = ST_ADM_ACTION_ARUNN; - } - else if (strcmp(value, "adown") == 0) { - action = ST_ADM_ACTION_ADOWN; - } - /* else these are the old supported methods */ - else if (strcmp(value, "disable") == 0) { - action = ST_ADM_ACTION_DISABLE; - } - else if (strcmp(value, "enable") == 0) { - action = ST_ADM_ACTION_ENABLE; - } - else if (strcmp(value, "stop") == 0) { - action = ST_ADM_ACTION_STOP; - } - else if (strcmp(value, "start") == 0) { - action = ST_ADM_ACTION_START; - } - else { - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - } - else if (strcmp(key, "s") == 0) { - if (!(px && action)) { - /* - * Indicates that we'll need to reprocess the parameters - * as soon as backend and action are known - */ - if (!reprocess) { - st_cur_param = cur_param; - st_next_param = next_param; - } - reprocess = 1; - } - else if ((sv = findserver(px, value)) != NULL) { - HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); - switch (action) { - case ST_ADM_ACTION_DISABLE: - if (!(sv->cur_admin & SRV_ADMF_FMAINT)) { - altered_servers++; - total_servers++; - srv_set_admin_flag(sv, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_STATS_DISABLE); - } - break; - case ST_ADM_ACTION_ENABLE: - if (sv->cur_admin & SRV_ADMF_FMAINT) { - altered_servers++; - total_servers++; - srv_clr_admin_flag(sv, SRV_ADMF_FMAINT); - } - break; - case ST_ADM_ACTION_STOP: - if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) { - srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_STATS_STOP); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_START: - if (sv->cur_admin & SRV_ADMF_FDRAIN) { - srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_DHLTH: - if (sv->check.state & CHK_ST_CONFIGURED) { - sv->check.state &= ~CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_EHLTH: - if (sv->check.state & CHK_ST_CONFIGURED) { - sv->check.state |= CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HRUNN: - if (!(sv->track)) { - sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HNOLB: - if (!(sv->track)) { - sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_stopping(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HDOWN: - if (!(sv->track)) { - sv->check.health = 0; - srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_DAGENT: - if (sv->agent.state & CHK_ST_CONFIGURED) { - sv->agent.state &= ~CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_EAGENT: - if (sv->agent.state & CHK_ST_CONFIGURED) { - sv->agent.state |= CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_ARUNN: - if (sv->agent.state & CHK_ST_ENABLED) { - sv->agent.health = sv->agent.rise + sv->agent.fall - 1; - srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_ADOWN: - if (sv->agent.state & CHK_ST_ENABLED) { - sv->agent.health = 0; - srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_READY: - srv_adm_set_ready(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_DRAIN: - srv_adm_set_drain(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_MAINT: - srv_adm_set_maint(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_SHUTDOWN: - if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) { - srv_shutdown_streams(sv, SF_ERR_KILLED); - altered_servers++; - total_servers++; - } - break; - } - HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); - } else { - /* the server name is unknown or ambiguous (duplicate names) */ - total_servers++; - } - } - if (reprocess && px && action) { - /* Now, we know the backend and the action chosen by the user. - * We can safely restart from the first server parameter - * to reprocess them - */ - cur_param = st_cur_param; - next_param = st_next_param; - reprocess = 0; - goto reprocess_servers; - } - - next_param = cur_param; - } - } - - if (total_servers == 0) { - ctx->st_code = STAT_STATUS_NONE; - } - else if (altered_servers == 0) { - ctx->st_code = STAT_STATUS_ERRP; - } - else if (altered_servers == total_servers) { - ctx->st_code = STAT_STATUS_DONE; - } - else { - ctx->st_code = STAT_STATUS_PART; - } - out: - return 1; - wait: - ctx->st_code = STAT_STATUS_NONE; - return 0; -} - - -static int stats_send_http_headers(struct stconn *sc, struct htx *htx) -{ - struct stream *s = __sc_strm(sc); - struct uri_auth *uri; - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct htx_sl *sl; - unsigned int flags; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK); - sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK")); - if (!sl) - goto full; - sl->info.res.status = 200; - - if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache"))) - goto full; - if (ctx->flags & STAT_FMT_HTML) { - if (!htx_add_header(htx, ist("Content-Type"), ist("text/html"))) - goto full; - } - else if (ctx->flags & (STAT_FMT_JSON|STAT_JSON_SCHM)) { - if (!htx_add_header(htx, ist("Content-Type"), ist("application/json"))) - goto full; - } - else { - if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain"))) - goto full; - } - - if (uri->refresh > 0 && !(ctx->flags & STAT_NO_REFRESH)) { - const char *refresh = U2A(uri->refresh); - if (!htx_add_header(htx, ist("Refresh"), ist(refresh))) - goto full; - } - - if (ctx->flags & STAT_CHUNKED) { - if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked"))) - goto full; - } - - if (!htx_add_endof(htx, HTX_BLK_EOH)) - goto full; - - channel_add_input(&s->res, htx->data); - return 1; - - full: - htx_reset(htx); - sc_need_room(sc, 0); - return 0; -} - - -static int stats_send_http_redirect(struct stconn *sc, struct htx *htx) -{ - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - struct stream *s = __sc_strm(sc); - struct uri_auth *uri; - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct htx_sl *sl; - unsigned int flags; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - /* We don't want to land on the posted stats page because a refresh will - * repost the data. We don't want this to happen on accident so we redirect - * the browse to the stats page with a GET. - */ - chunk_printf(&trash, "%s;st=%s%s%s%s", - uri->uri_prefix, - ((ctx->st_code > STAT_STATUS_INIT) && - (ctx->st_code < STAT_STATUS_SIZE) && - stat_status_codes[ctx->st_code]) ? - stat_status_codes[ctx->st_code] : - stat_status_codes[STAT_STATUS_UNKN], - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_CHNK); - sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other")); - if (!sl) - goto full; - sl->info.res.status = 303; - - if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || - !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) || - !htx_add_header(htx, ist("Content-Length"), ist("0")) || - !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data))) - goto full; - - if (!htx_add_endof(htx, HTX_BLK_EOH)) - goto full; - - channel_add_input(&s->res, htx->data); - return 1; - -full: - htx_reset(htx); - sc_need_room(sc, 0); - return 0; -} - -/* This I/O handler runs as an applet embedded in a stream connector. It is - * used to send HTTP stats over a TCP socket. The mechanism is very simple. - * appctx->st0 contains the operation in progress (dump, done). The handler - * automatically unregisters itself once transfer is complete. - */ -static void http_stats_io_handler(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); - struct stream *s = __sc_strm(sc); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct htx *req_htx, *res_htx; - - /* only proxy stats are available via http */ - ctx->domain = STATS_DOMAIN_PROXY; - - res_htx = htx_from_buf(&res->buf); - - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { - appctx->st0 = STAT_HTTP_END; - goto out; - } - - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); - goto out; - } - - /* all states are processed in sequence */ - if (appctx->st0 == STAT_HTTP_HEAD) { - if (stats_send_http_headers(sc, res_htx)) { - if (s->txn->meth == HTTP_METH_HEAD) - appctx->st0 = STAT_HTTP_DONE; - else - appctx->st0 = STAT_HTTP_DUMP; - } - } - - if (appctx->st0 == STAT_HTTP_DUMP) { - trash_chunk = b_make(trash.area, res->buf.size, 0, 0); - /* adjust buffer size to take htx overhead into account, - * make sure to perform this call on an empty buffer - */ - trash_chunk.size = buf_room_for_htx_data(&trash_chunk); - if (stats_dump_stat_to_buffer(sc, res_htx)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_POST) { - if (stats_process_http_post(sc)) - appctx->st0 = STAT_HTTP_LAST; - else if (s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_LAST) { - if (stats_send_http_redirect(sc, res_htx)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_DONE) { - /* no more data are expected. If the response buffer is empty, - * be sure to add something (EOT block in this case) to have - * something to send. It is important to be sure the EOM flags - * will be handled by the endpoint. - */ - if (htx_is_empty(res_htx)) { - if (!htx_add_endof(res_htx, HTX_BLK_EOT)) { - sc_need_room(sc, sizeof(struct htx_blk) + 1); - goto out; - } - channel_add_input(res, 1); - } - res_htx->flags |= HTX_FL_EOM; - se_fl_set(appctx->sedesc, SE_FL_EOI); - appctx->st0 = STAT_HTTP_END; - } - - if (appctx->st0 == STAT_HTTP_END) { - se_fl_set(appctx->sedesc, SE_FL_EOS); - applet_will_consume(appctx); - } - - out: - /* we have left the request in the buffer for the case where we - * process a POST, and this automatically re-enables activity on - * read. It's better to indicate that we want to stop reading when - * we're sending, so that we know there's at most one direction - * deciding to wake the applet up. It saves it from looping when - * emitting large blocks into small TCP windows. - */ - htx_to_buf(res_htx, &res->buf); - if (appctx->st0 == STAT_HTTP_END) { - /* eat the whole request */ - if (co_data(req)) { - req_htx = htx_from_buf(&req->buf); - co_htx_skip(req, req_htx, co_data(req)); - htx_to_buf(req_htx, &req->buf); - } - } - else if (co_data(res)) - applet_wont_consume(appctx); -} - -/* Dump all fields from <info> into <out> using the "show info" format (name: value) */ +/* Dump all fields from <info_fields> into <out> using the "show info" format (name: value) */ static int stats_dump_info_fields(struct buffer *out, - const struct field *info, + const struct field *line, struct show_stat_ctx *ctx) { int flags = ctx->flags; - int field; + int i; - for (field = 0; field < INF_TOTAL_FIELDS; field++) { - if (!field_format(info, field)) + for (i = 0; i < ST_I_INF_MAX; i++) { + if (!field_format(line, i)) continue; - if (!chunk_appendf(out, "%s: ", info_fields[field].name)) + if (!chunk_appendf(out, "%s: ", stat_cols_info[i].name)) return 0; - if (!stats_emit_raw_data_field(out, &info[field])) + if (!stats_emit_raw_data_field(out, &line[i])) return 0; - if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc)) + if ((flags & STAT_F_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", stat_cols_info[i].desc)) return 0; if (!chunk_strcat(out, "\n")) return 0; @@ -4606,25 +654,27 @@ static int stats_dump_info_fields(struct buffer *out, return 1; } -/* Dump all fields from <info> into <out> using the "show info typed" format */ +/* Dump all fields from <line> into <out> using the "show info typed" format */ static int stats_dump_typed_info_fields(struct buffer *out, - const struct field *info, + const struct field *line, struct show_stat_ctx *ctx) { int flags = ctx->flags; - int field; + int i; - for (field = 0; field < INF_TOTAL_FIELDS; field++) { - if (!field_format(info, field)) + for (i = 0; i < ST_I_INF_MAX; i++) { + if (!field_format(line, i)) continue; - if (!chunk_appendf(out, "%d.%s.%u:", field, info_fields[field].name, info[INF_PROCESS_NUM].u.u32)) + if (!chunk_appendf(out, "%d.%s.%u:", i, stat_cols_info[i].name, + line[ST_I_INF_PROCESS_NUM].u.u32)) { return 0; - if (!stats_emit_field_tags(out, &info[field], ':')) + } + if (!stats_emit_field_tags(out, &line[i], ':')) return 0; - if (!stats_emit_typed_data_field(out, &info[field])) + if (!stats_emit_typed_data_field(out, &line[i])) return 0; - if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc)) + if ((flags & STAT_F_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", stat_cols_info[i].desc)) return 0; if (!chunk_strcat(out, "\n")) return 0; @@ -4633,12 +683,12 @@ static int stats_dump_typed_info_fields(struct buffer *out, } /* Fill <info> with HAProxy global info. <info> is preallocated array of length - * <len>. The length of the array must be INF_TOTAL_FIELDS. If this length is + * <len>. The length of the array must be ST_I_INF_MAX. If this length is * less then this value, the function returns 0, otherwise, it returns 1. Some - * fields' presence or precision may depend on some of the STAT_* flags present + * fields' presence or precision may depend on some of the STAT_F_* flags present * in <flags>. */ -int stats_fill_info(struct field *info, int len, uint flags) +int stats_fill_info(struct field *line, int len, uint flags) { struct buffer *out = get_trash_chunk(); uint64_t glob_out_bytes, glob_spl_bytes, glob_out_b32; @@ -4671,102 +721,102 @@ int stats_fill_info(struct field *info, int len, uint flags) boot = tv_ms_remain(&start_date, &ready_date); - if (len < INF_TOTAL_FIELDS) + if (len < ST_I_INF_MAX) return 0; chunk_reset(out); - memset(info, 0, sizeof(*info) * len); + memset(line, 0, sizeof(*line) * len); - info[INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME); - info[INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); - info[INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); - info[INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date); + line[ST_I_INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME); + line[ST_I_INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); + line[ST_I_INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); + line[ST_I_INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date); - info[INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread); - info[INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1); - info[INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1); - info[INF_PID] = mkf_u32(FO_STATUS, pid); + line[ST_I_INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread); + line[ST_I_INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1); + line[ST_I_INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1); + line[ST_I_INF_PID] = mkf_u32(FO_STATUS, pid); - info[INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out)); + line[ST_I_INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out)); chunk_appendf(out, "%ud %uh%02um%02us", up_sec / 86400, (up_sec % 86400) / 3600, (up_sec % 3600) / 60, (up_sec % 60)); - info[INF_UPTIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, up_sec + up_usec / 1000000.0) : mkf_u32(FN_DURATION, up_sec); - info[INF_START_TIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec); - info[INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax); - info[INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L); - info[INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L)); - info[INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated()); - info[INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L)); - info[INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used()); - info[INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures()); - info[INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile); - info[INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock); - info[INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn); - info[INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn); - info[INF_CURR_CONN] = mkf_u32(0, actconn); - info[INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn); - info[INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count); + line[ST_I_INF_UPTIME_SEC] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_DURATION, up_sec + up_usec / 1000000.0) : mkf_u32(FN_DURATION, up_sec); + line[ST_I_INF_START_TIME_SEC] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec); + line[ST_I_INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax); + line[ST_I_INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L); + line[ST_I_INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L)); + line[ST_I_INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated()); + line[ST_I_INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L)); + line[ST_I_INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used()); + line[ST_I_INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures()); + line[ST_I_INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile); + line[ST_I_INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock); + line[ST_I_INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn); + line[ST_I_INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn); + line[ST_I_INF_CURR_CONN] = mkf_u32(0, actconn); + line[ST_I_INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn); + line[ST_I_INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count); #ifdef USE_OPENSSL - info[INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn); - info[INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns); - info[INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns); + line[ST_I_INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn); + line[ST_I_INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns); + line[ST_I_INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns); #endif - info[INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes); - info[INF_PIPES_USED] = mkf_u32(0, pipes_used); - info[INF_PIPES_FREE] = mkf_u32(0, pipes_free); - info[INF_CONN_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec)); - info[INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim); - info[INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max); - info[INF_SESS_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec)); - info[INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim); - info[INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max); + line[ST_I_INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes); + line[ST_I_INF_PIPES_USED] = mkf_u32(0, pipes_used); + line[ST_I_INF_PIPES_FREE] = mkf_u32(0, pipes_free); + line[ST_I_INF_CONN_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec)); + line[ST_I_INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim); + line[ST_I_INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max); + line[ST_I_INF_SESS_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec)); + line[ST_I_INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim); + line[ST_I_INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max); #ifdef USE_OPENSSL - info[INF_SSL_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate); - info[INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim); - info[INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max); - info[INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate); - info[INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max); - info[INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse); - info[INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec)); - info[INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max); - info[INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups); - info[INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses); + line[ST_I_INF_SSL_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate); + line[ST_I_INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim); + line[ST_I_INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max); + line[ST_I_INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate); + line[ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max); + line[ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse); + line[ST_I_INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec)); + line[ST_I_INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max); + line[ST_I_INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups); + line[ST_I_INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses); #endif - info[INF_COMPRESS_BPS_IN] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in)); - info[INF_COMPRESS_BPS_OUT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out)); - info[INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim); + line[ST_I_INF_COMPRESS_BPS_IN] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in)); + line[ST_I_INF_COMPRESS_BPS_OUT] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out)); + line[ST_I_INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim); #ifdef USE_ZLIB - info[INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory); - info[INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem); + line[ST_I_INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory); + line[ST_I_INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem); #endif - info[INF_TASKS] = mkf_u32(0, total_allocated_tasks()); - info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues()); - info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle()); - info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node); + line[ST_I_INF_TASKS] = mkf_u32(0, total_allocated_tasks()); + line[ST_I_INF_RUN_QUEUE] = mkf_u32(0, total_run_queues()); + line[ST_I_INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle()); + line[ST_I_INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node); if (global.desc) - info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc); - info[INF_STOPPING] = mkf_u32(0, stopping); - info[INF_JOBS] = mkf_u32(0, jobs); - info[INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs); - info[INF_LISTENERS] = mkf_u32(0, listeners); - info[INF_ACTIVE_PEERS] = mkf_u32(0, active_peers); - info[INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers); - info[INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs); - info[INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING)); - info[INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions); - info[INF_TOTAL_BYTES_OUT] = mkf_u64(0, glob_out_bytes); - info[INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, glob_spl_bytes); - info[INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, glob_out_b32); - info[INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued); - info[INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages); - - info[INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out)); + line[ST_I_INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc); + line[ST_I_INF_STOPPING] = mkf_u32(0, stopping); + line[ST_I_INF_JOBS] = mkf_u32(0, jobs); + line[ST_I_INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs); + line[ST_I_INF_LISTENERS] = mkf_u32(0, listeners); + line[ST_I_INF_ACTIVE_PEERS] = mkf_u32(0, active_peers); + line[ST_I_INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers); + line[ST_I_INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs); + line[ST_I_INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING)); + line[ST_I_INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions); + line[ST_I_INF_TOTAL_BYTES_OUT] = mkf_u64(0, glob_out_bytes); + line[ST_I_INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, glob_spl_bytes); + line[ST_I_INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, glob_out_b32); + line[ST_I_INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued); + line[ST_I_INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages); + + line[ST_I_INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out)); chunk_appendf(out, "%#x", get_tainted()); - info[INF_WARNINGS] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&tot_warnings)); - info[INF_MAXCONN_REACHED] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&maxconn_reached)); - info[INF_BOOTTIME_MS] = mkf_u32(FN_DURATION, boot); - info[INF_NICED_TASKS] = mkf_u32(0, total_niced_running_tasks()); + line[ST_I_INF_WARNINGS] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&tot_warnings)); + line[ST_I_INF_MAXCONN_REACHED] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&maxconn_reached)); + line[ST_I_INF_BOOTTIME_MS] = mkf_u32(FN_DURATION, boot); + line[ST_I_INF_NICED_TASKS] = mkf_u32(0, total_niced_running_tasks()); return 1; } @@ -4779,24 +829,25 @@ static int stats_dump_info_to_buffer(struct stconn *sc) { struct appctx *appctx = __sc_appctx(sc); struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; int ret; int current_field; - if (!stats_fill_info(info, INF_TOTAL_FIELDS, ctx->flags)) + if (!stats_fill_info(stat_line_info, ST_I_INF_MAX, ctx->flags)) return 0; - chunk_reset(&trash_chunk); + chunk_reset(chk); more: current_field = ctx->field; - if (ctx->flags & STAT_FMT_TYPED) - ret = stats_dump_typed_info_fields(&trash_chunk, info, ctx); - else if (ctx->flags & STAT_FMT_JSON) - ret = stats_dump_json_info_fields(&trash_chunk, info, ctx); + if (ctx->flags & STAT_F_FMT_TYPED) + ret = stats_dump_typed_info_fields(chk, stat_line_info, ctx); + else if (ctx->flags & STAT_F_FMT_JSON) + ret = stats_dump_json_info_fields(chk, stat_line_info, ctx); else - ret = stats_dump_info_fields(&trash_chunk, info, ctx); + ret = stats_dump_info_fields(chk, stat_line_info, ctx); - if (applet_putchk(appctx, &trash_chunk) == -1) { + if (applet_putchk(appctx, chk) == -1) { /* restore previous field */ ctx->field = current_field; return 0; @@ -4809,248 +860,8 @@ more: return 1; } -/* This function dumps the schema onto the stream connector's read buffer. - * It returns 0 as long as it does not complete, non-zero upon completion. - * No state is used. - * - * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as - * per the recommendation for interoperable integers in section 6 of RFC 7159. - */ -static void stats_dump_json_schema(struct buffer *out) -{ - - int old_len = out->data; - - chunk_strcat(out, - "{" - "\"$schema\":\"http://json-schema.org/draft-04/schema#\"," - "\"oneOf\":[" - "{" - "\"title\":\"Info\"," - "\"type\":\"array\"," - "\"items\":{" - "\"title\":\"InfoItem\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"field\":{\"$ref\":\"#/definitions/field\"}," - "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," - "\"tags\":{\"$ref\":\"#/definitions/tags\"}," - "\"value\":{\"$ref\":\"#/definitions/typedValue\"}" - "}," - "\"required\":[\"field\",\"processNum\",\"tags\"," - "\"value\"]" - "}" - "}," - "{" - "\"title\":\"Stat\"," - "\"type\":\"array\"," - "\"items\":{" - "\"title\":\"InfoItem\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"objType\":{" - "\"enum\":[\"Frontend\",\"Backend\",\"Listener\"," - "\"Server\",\"Unknown\"]" - "}," - "\"proxyId\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"id\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"field\":{\"$ref\":\"#/definitions/field\"}," - "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," - "\"tags\":{\"$ref\":\"#/definitions/tags\"}," - "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}" - "}," - "\"required\":[\"objType\",\"proxyId\",\"id\"," - "\"field\",\"processNum\",\"tags\"," - "\"value\"]" - "}" - "}," - "{" - "\"title\":\"Error\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"errorStr\":{" - "\"type\":\"string\"" - "}" - "}," - "\"required\":[\"errorStr\"]" - "}" - "]," - "\"definitions\":{" - "\"field\":{" - "\"type\":\"object\"," - "\"pos\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"name\":{" - "\"type\":\"string\"" - "}," - "\"required\":[\"pos\",\"name\"]" - "}," - "\"processNum\":{" - "\"type\":\"integer\"," - "\"minimum\":1" - "}," - "\"tags\":{" - "\"type\":\"object\"," - "\"origin\":{" - "\"type\":\"string\"," - "\"enum\":[\"Metric\",\"Status\",\"Key\"," - "\"Config\",\"Product\",\"Unknown\"]" - "}," - "\"nature\":{" - "\"type\":\"string\"," - "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\"," - "\"Rate\",\"Counter\",\"Duration\"," - "\"Age\",\"Time\",\"Name\",\"Output\"," - "\"Avg\", \"Unknown\"]" - "}," - "\"scope\":{" - "\"type\":\"string\"," - "\"enum\":[\"Cluster\",\"Process\",\"Service\"," - "\"System\",\"Unknown\"]" - "}," - "\"required\":[\"origin\",\"nature\",\"scope\"]" - "}," - "\"typedValue\":{" - "\"type\":\"object\"," - "\"oneOf\":[" - "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}" - "]," - "\"definitions\":{" - "\"s32Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"s32\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":-2147483648," - "\"maximum\":2147483647" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"s64Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"s64\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":-9007199254740991," - "\"maximum\":9007199254740991" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"u32Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"u32\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":0," - "\"maximum\":4294967295" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"u64Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"u64\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":0," - "\"maximum\":9007199254740991" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"strValue\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"str\"]" - "}," - "\"value\":{\"type\":\"string\"}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"unknownValue\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"value\":{" - "\"type\":\"string\"," - "\"enum\":[\"unknown\"]" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}" - "}" - "}" - "}" - "}"); - - if (old_len == out->data) { - chunk_reset(out); - chunk_appendf(out, - "{\"errorStr\":\"output buffer too short\"}"); - } - chunk_appendf(out, "\n"); -} - -/* This function dumps the schema onto the stream connector's read buffer. - * It returns 0 as long as it does not complete, non-zero upon completion. - * No state is used. - */ -static int stats_dump_json_schema_to_buffer(struct appctx *appctx) -{ - - chunk_reset(&trash_chunk); - - stats_dump_json_schema(&trash_chunk); - - if (applet_putchk(appctx, &trash_chunk) == -1) - return 0; - - return 1; -} - -static void http_stats_release(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - - if (ctx->px_st == STAT_PX_ST_SV) - srv_drop(ctx->obj2); -} - static int cli_parse_clear_counters(char **args, char *payload, struct appctx *appctx, void *private) { - struct proxy *px; - struct server *sv; - struct listener *li; - struct stats_module *mod; int clrall = 0; if (strcmp(args[2], "all") == 0) @@ -5061,96 +872,13 @@ static int cli_parse_clear_counters(char **args, char *payload, struct appctx *a (clrall && !cli_has_level(appctx, ACCESS_LVL_ADMIN))) return 1; - for (px = proxies_list; px; px = px->next) { - if (clrall) { - memset(&px->be_counters, 0, sizeof(px->be_counters)); - memset(&px->fe_counters, 0, sizeof(px->fe_counters)); - } - else { - px->be_counters.conn_max = 0; - px->be_counters.p.http.rps_max = 0; - px->be_counters.sps_max = 0; - px->be_counters.cps_max = 0; - px->be_counters.nbpend_max = 0; - px->be_counters.qtime_max = 0; - px->be_counters.ctime_max = 0; - px->be_counters.dtime_max = 0; - px->be_counters.ttime_max = 0; - - px->fe_counters.conn_max = 0; - px->fe_counters.p.http.rps_max = 0; - px->fe_counters.sps_max = 0; - px->fe_counters.cps_max = 0; - } - - for (sv = px->srv; sv; sv = sv->next) - if (clrall) - memset(&sv->counters, 0, sizeof(sv->counters)); - else { - sv->counters.cur_sess_max = 0; - sv->counters.nbpend_max = 0; - sv->counters.sps_max = 0; - sv->counters.qtime_max = 0; - sv->counters.ctime_max = 0; - sv->counters.dtime_max = 0; - sv->counters.ttime_max = 0; - } - - list_for_each_entry(li, &px->conf.listeners, by_fe) - if (li->counters) { - if (clrall) - memset(li->counters, 0, sizeof(*li->counters)); - else - li->counters->conn_max = 0; - } - } - global.cps_max = 0; global.sps_max = 0; global.ssl_max = 0; global.ssl_fe_keys_max = 0; global.ssl_be_keys_max = 0; - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - if (!mod->clearable && !clrall) - continue; - - for (px = proxies_list; px; px = px->next) { - enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags); - - if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) { - EXTRA_COUNTERS_INIT(px->extra_counters_fe, - mod, - mod->counters, - mod->counters_size); - } - - if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) { - EXTRA_COUNTERS_INIT(px->extra_counters_be, - mod, - mod->counters, - mod->counters_size); - } - - if (mod_cap & STATS_PX_CAP_SRV) { - for (sv = px->srv; sv; sv = sv->next) { - EXTRA_COUNTERS_INIT(sv->extra_counters, - mod, - mod->counters, - mod->counters_size); - } - } - - if (mod_cap & STATS_PX_CAP_LI) { - list_for_each_entry(li, &px->conf.listeners, by_fe) { - EXTRA_COUNTERS_INIT(li->extra_counters, - mod, - mod->counters, - mod->counters_size); - } - } - } - } + proxy_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_PROXY]); resolv_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_RESOLVERS]); @@ -5171,13 +899,13 @@ static int cli_parse_show_info(char **args, char *payload, struct appctx *appctx while (*args[arg]) { if (strcmp(args[arg], "typed") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_TYPED; else if (strcmp(args[arg], "json") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_JSON; else if (strcmp(args[arg], "desc") == 0) - ctx->flags |= STAT_SHOW_FDESC; + ctx->flags |= STAT_F_SHOW_FDESC; else if (strcmp(args[arg], "float") == 0) - ctx->flags |= STAT_USE_FLOAT; + ctx->flags |= STAT_F_USE_FLOAT; arg++; } return 0; @@ -5192,10 +920,10 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx ctx->scope_str = 0; ctx->scope_len = 0; ctx->http_px = NULL; // not under http context - ctx->flags = STAT_SHNODE | STAT_SHDESC; + ctx->flags = STAT_F_SHNODE | STAT_F_SHDESC; if ((strm_li(appctx_strm(appctx))->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) - ctx->flags |= STAT_SHLGNDS; + ctx->flags |= STAT_F_SHLGNDS; /* proxy is the default domain */ ctx->domain = STATS_DOMAIN_PROXY; @@ -5225,7 +953,7 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx if (!ctx->iid) return cli_err(appctx, "No such proxy.\n"); - ctx->flags |= STAT_BOUND; + ctx->flags |= STAT_F_BOUND; ctx->type = atoi(args[arg+1]); ctx->sid = atoi(args[arg+2]); arg += 3; @@ -5233,15 +961,15 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx while (*args[arg]) { if (strcmp(args[arg], "typed") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_TYPED; else if (strcmp(args[arg], "json") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_JSON; else if (strcmp(args[arg], "desc") == 0) - ctx->flags |= STAT_SHOW_FDESC; + ctx->flags |= STAT_F_SHOW_FDESC; else if (strcmp(args[arg], "no-maint") == 0) - ctx->flags |= STAT_HIDE_MAINT; + ctx->flags |= STAT_F_HIDE_MAINT; else if (strcmp(args[arg], "up") == 0) - ctx->flags |= STAT_HIDE_DOWN; + ctx->flags |= STAT_F_HIDE_DOWN; arg++; } @@ -5250,7 +978,8 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx static int cli_io_handler_dump_info(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); return stats_dump_info_to_buffer(appctx_sc(appctx)); } @@ -5259,8 +988,9 @@ static int cli_io_handler_dump_info(struct appctx *appctx) */ static int cli_io_handler_dump_stat(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); - return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); + return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); } static void cli_io_handler_release_stat(struct appctx *appctx) @@ -5273,10 +1003,61 @@ static void cli_io_handler_release_stat(struct appctx *appctx) static int cli_io_handler_dump_json_schema(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); return stats_dump_json_schema_to_buffer(appctx); } +static int cli_parse_dump_stat_file(char **args, char *payload, + struct appctx *appctx, void *private) +{ + struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->chunk = b_make(trash.area, trash.size, 0, 0); + ctx->domain = STATS_DOMAIN_PROXY; + ctx->flags |= STAT_F_FMT_FILE; + + return 0; +} + +/* Returns 1 on completion else 0. */ +static int cli_io_handler_dump_stat_file(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + int ret; + + /* Frontend and backend sides are outputted separately on stats-file. + * As such, use STAT_F_BOUND to restrict proxies looping over frontend + * side first before first stats_dump_stat_to_buffer(). A second + * iteration is conducted for backend side after. + */ + ctx->flags |= STAT_F_BOUND; + + if (!(ctx->type & (1 << STATS_TYPE_BE))) { + /* Restrict to frontend side. */ + ctx->type = (1 << STATS_TYPE_FE) | (1 << STATS_TYPE_SO); + ctx->iid = ctx->sid = -1; + + ret = stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); + if (!ret) + return 0; + + chunk_strcat(&ctx->chunk, "\n"); + if (!stats_putchk(appctx, NULL, NULL)) + return 0; + + /* Switch to backend side. */ + ctx->state = STAT_STATE_INIT; + ctx->type = (1 << STATS_TYPE_BE) | (1 << STATS_TYPE_SV); + } + + return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); +} + +static void cli_io_handler_release_dump_stat_file(struct appctx *appctx) +{ +} + int stats_allocate_proxy_counters_internal(struct extra_counters **counters, int type, int px_cap) { @@ -5354,33 +1135,35 @@ void stats_register_module(struct stats_module *m) const uint8_t domain = stats_get_domain(m->domain_flags); LIST_APPEND(&stats_module_list[domain], &m->list); - stat_count[domain] += m->stats_count; + stat_cols_len[domain] += m->stats_count; } + static int allocate_stats_px_postcheck(void) { struct stats_module *mod; - size_t i = ST_F_TOTAL_FIELDS; + size_t i = ST_I_PX_MAX, offset; int err_code = 0; struct proxy *px; - stat_count[STATS_DOMAIN_PROXY] += ST_F_TOTAL_FIELDS; + stat_cols_len[STATS_DOMAIN_PROXY] += ST_I_PX_MAX; - stat_f[STATS_DOMAIN_PROXY] = malloc(stat_count[STATS_DOMAIN_PROXY] * sizeof(struct name_desc)); - if (!stat_f[STATS_DOMAIN_PROXY]) { + stat_cols[STATS_DOMAIN_PROXY] = malloc(stat_cols_len[STATS_DOMAIN_PROXY] * sizeof(struct name_desc)); + if (!stat_cols[STATS_DOMAIN_PROXY]) { ha_alert("stats: cannot allocate all fields for proxy statistics\n"); err_code |= ERR_ALERT | ERR_FATAL; return err_code; } - memcpy(stat_f[STATS_DOMAIN_PROXY], stat_fields, - ST_F_TOTAL_FIELDS * sizeof(struct name_desc)); + for (i = 0; i < ST_I_PX_MAX; ++i) + stcol2ndesc(&stat_cols[STATS_DOMAIN_PROXY][i], &stat_cols_px[i]); list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - memcpy(stat_f[STATS_DOMAIN_PROXY] + i, - mod->stats, - mod->stats_count * sizeof(struct name_desc)); - i += mod->stats_count; + for (offset = i, i = 0; i < mod->stats_count; ++i) { + stcol2ndesc(&stat_cols[STATS_DOMAIN_PROXY][offset + i], + &mod->stats[i]); + } + i += offset; } for (px = proxies_list; px; px = px->next) { @@ -5391,7 +1174,7 @@ static int allocate_stats_px_postcheck(void) } } - /* wait per-thread alloc to perform corresponding stat_l allocation */ + /* wait per-thread alloc to perform corresponding stat_lines allocation */ return err_code; } @@ -5401,21 +1184,22 @@ REGISTER_CONFIG_POSTPARSER("allocate-stats-px", allocate_stats_px_postcheck); static int allocate_stats_rslv_postcheck(void) { struct stats_module *mod; - size_t i = 0; + size_t i = 0, offset; int err_code = 0; - stat_f[STATS_DOMAIN_RESOLVERS] = malloc(stat_count[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc)); - if (!stat_f[STATS_DOMAIN_RESOLVERS]) { + stat_cols[STATS_DOMAIN_RESOLVERS] = malloc(stat_cols_len[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc)); + if (!stat_cols[STATS_DOMAIN_RESOLVERS]) { ha_alert("stats: cannot allocate all fields for resolver statistics\n"); err_code |= ERR_ALERT | ERR_FATAL; return err_code; } list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_RESOLVERS], list) { - memcpy(stat_f[STATS_DOMAIN_RESOLVERS] + i, - mod->stats, - mod->stats_count * sizeof(struct name_desc)); - i += mod->stats_count; + for (offset = i, i = 0; i < mod->stats_count; ++i) { + stcol2ndesc(&stat_cols[STATS_DOMAIN_RESOLVERS][offset + i], + &mod->stats[i]); + } + i += offset; } if (!resolv_allocate_counters(&stats_module_list[STATS_DOMAIN_RESOLVERS])) { @@ -5424,7 +1208,7 @@ static int allocate_stats_rslv_postcheck(void) return err_code; } - /* wait per-thread alloc to perform corresponding stat_l allocation */ + /* wait per-thread alloc to perform corresponding stat_lines allocation */ return err_code; } @@ -5438,8 +1222,8 @@ static int allocate_stat_lines_per_thread(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - stat_l[domain] = malloc(stat_count[domain] * sizeof(struct field)); - if (!stat_l[domain]) + stat_lines[domain] = malloc(stat_cols_len[domain] * sizeof(struct field)); + if (!stat_lines[domain]) return 0; } return 1; @@ -5482,7 +1266,7 @@ static void deinit_stat_lines_per_thread(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - ha_free(&stat_l[domain]); + ha_free(&stat_lines[domain]); } } @@ -5496,8 +1280,8 @@ static void deinit_stats(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - if (stat_f[domain]) - free(stat_f[domain]); + if (stat_cols[domain]) + free(stat_cols[domain]); } } @@ -5517,18 +1301,12 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "info", NULL }, "show info [desc|json|typed|float]* : report information about the running process", cli_parse_show_info, cli_io_handler_dump_info, NULL }, { { "show", "stat", NULL }, "show stat [desc|json|no-maint|typed|up]*: report counters for each proxy and server", cli_parse_show_stat, cli_io_handler_dump_stat, cli_io_handler_release_stat }, { { "show", "schema", "json", NULL }, "show schema json : report schema used for stats", NULL, cli_io_handler_dump_json_schema, NULL }, + { { "dump", "stats-file", NULL }, "dump stats-file : dump stats for restore", cli_parse_dump_stat_file, cli_io_handler_dump_stat_file, cli_io_handler_release_dump_stat_file }, {{},} }}; INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); -struct applet http_stats_applet = { - .obj_type = OBJ_TYPE_APPLET, - .name = "<STATS>", /* used for logging */ - .fct = http_stats_io_handler, - .release = http_stats_release, -}; - /* * Local variables: * c-indent-level: 8 diff --git a/src/stconn.c b/src/stconn.c index df119a1..6077403 100644 --- a/src/stconn.c +++ b/src/stconn.c @@ -14,6 +14,7 @@ #include <haproxy/applet.h> #include <haproxy/connection.h> #include <haproxy/check.h> +#include <haproxy/filters.h> #include <haproxy/http_ana.h> #include <haproxy/pipe.h> #include <haproxy/pool.h> @@ -99,6 +100,9 @@ void sedesc_init(struct sedesc *sedesc) sedesc->xref.peer = NULL; se_fl_setall(sedesc, SE_FL_NONE); + sedesc->abort_info.info = 0; + sedesc->abort_info.code = 0; + sedesc->iobuf.pipe = NULL; sedesc->iobuf.buf = NULL; sedesc->iobuf.offset = sedesc->iobuf.data = 0; @@ -130,6 +134,54 @@ void sedesc_free(struct sedesc *sedesc) } } +/* Performs a shutdown on the endpoint. This function deals with connection and + * applet endpoints. It is responsible to set SE flags corresponding to the + * given shut modes and to call right shutdown functions of the endpoint. It is + * called from the .abort and .shut app_ops callback functions at the SC level. + */ +void se_shutdown(struct sedesc *sedesc, enum se_shut_mode mode) +{ + if (se_fl_test(sedesc, SE_FL_T_MUX)) { + const struct mux_ops *mux = (sedesc->conn ? sedesc->conn->mux : NULL); + unsigned int flags = 0; + + if ((mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) && !se_fl_test(sedesc, SE_FL_SHW)) + flags |= (mode & SE_SHW_NORMAL) ? SE_FL_SHWN : SE_FL_SHWS; + + + if ((mode & (SE_SHR_RESET|SE_SHR_DRAIN)) && !se_fl_test(sedesc, SE_FL_SHR)) + flags |= (mode & SE_SHR_DRAIN) ? SE_FL_SHRD : SE_FL_SHRR; + + if (flags) { + if (mux && mux->shut) { + struct se_abort_info *reason = NULL; + struct xref *peer = xref_get_peer_and_lock(&sedesc->xref); + + if (peer) { + struct sedesc *sdo = container_of(peer, struct sedesc, xref); + + reason = &sdo->abort_info; + xref_unlock(&sedesc->xref, peer); + } + + mux->shut(sedesc->sc, mode, reason); + + } + se_fl_set(sedesc, flags); + } + } + else if (se_fl_test(sedesc, SE_FL_T_APPLET)) { + if ((mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) && !se_fl_test(sedesc, SE_FL_SHW)) + se_fl_set(sedesc, SE_FL_SHWN); + + if ((mode & (SE_SHR_RESET|SE_SHR_DRAIN)) && !se_fl_test(sedesc, SE_FL_SHR)) + se_fl_set(sedesc, SE_FL_SHRR); + + if (se_fl_test(sedesc, SE_FL_SHR) && se_fl_test(sedesc, SE_FL_SHW)) + appctx_shut(sedesc->se); + } +} + /* Tries to allocate a new stconn and initialize its main fields. On * failure, nothing is allocated and NULL is returned. It is an internal * function. The caller must, at least, set the SE_FL_ORPHAN or SE_FL_DETACHED @@ -312,15 +364,17 @@ int sc_attach_mux(struct stconn *sc, void *sd, void *ctx) * removed. This function is called by a stream when a backend applet is * registered. */ -static void sc_attach_applet(struct stconn *sc, void *sd) +static int sc_attach_applet(struct stconn *sc, struct appctx *appctx) { - sc->sedesc->se = sd; + sc->sedesc->se = appctx; sc_ep_set(sc, SE_FL_T_APPLET); sc_ep_clr(sc, SE_FL_DETACHED); if (sc_strm(sc)) { sc->app_ops = &sc_app_applet_ops; xref_create(&sc->sedesc->xref, &sc_opposite(sc)->sedesc->xref); } + + return 0; } /* Attaches a stconn to a app layer and sets the relevant @@ -402,7 +456,7 @@ static void sc_detach_endp(struct stconn **scp) sc_ep_set(sc, SE_FL_ORPHAN); sc->sedesc->sc = NULL; sc->sedesc = NULL; - appctx_shut(appctx); + se_shutdown(appctx->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); appctx_free(appctx); } @@ -506,7 +560,10 @@ struct appctx *sc_applet_create(struct stconn *sc, struct applet *app) appctx = appctx_new_here(app, sc->sedesc); if (!appctx) return NULL; - sc_attach_applet(sc, appctx); + if (sc_attach_applet(sc, appctx) == -1) { + appctx_free_on_early_error(appctx); + return NULL; + } appctx->t->nice = __sc_strm(sc)->task->nice; applet_need_more_data(appctx); appctx_wakeup(appctx); @@ -612,21 +669,24 @@ static void sc_app_shut(struct stconn *sc) !(ic->flags & CF_DONT_READ)) return; - __fallthrough; + sc->state = SC_ST_DIS; + break; case SC_ST_CON: case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: /* Note that none of these states may happen with applets */ sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; + /* note that if the task exists, it must unregister itself once it runs */ if (!(sc->flags & SC_FL_DONT_WAKE)) task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO); @@ -691,7 +751,7 @@ static void sc_app_abort_conn(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - sc_conn_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -725,51 +785,42 @@ static void sc_app_shut_conn(struct stconn *sc) switch (sc->state) { case SC_ST_RDY: case SC_ST_EST: + /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. * However, if SC_FL_NOLINGER is explicitly set, we know there is * no risk so we close both sides immediately. */ - if (sc->flags & SC_FL_NOLINGER) { - /* unclean data-layer shutdown, typically an aborted request - * or a forwarded shutdown from a client to a server due to - * option abortonclose. No need for the TLS layer to try to - * emit a shutdown message. - */ - sc_conn_shutw(sc, CO_SHW_SILENT); + if (!(sc->flags & (SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) && !(ic->flags & CF_DONT_READ)) { + se_shutdown(sc->sedesc, SE_SHW_NORMAL); + return; } - else { - /* clean data-layer shutdown. This only happens on the - * frontend side, or on the backend side when forwarding - * a client close in TCP mode or in HTTP TUNNEL mode - * while option abortonclose is set. We want the TLS - * layer to try to signal it to the peer before we close. - */ - sc_conn_shutw(sc, CO_SHW_NORMAL); - if (!(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(ic->flags & CF_DONT_READ)) - return; - } + se_shutdown(sc->sedesc, SE_SHR_RESET|((sc->flags & SC_FL_NOLINGER) ? SE_SHW_SILENT : SE_SHW_NORMAL)); + sc->state = SC_ST_DIS; + break; - __fallthrough; case SC_ST_CON: /* we may have to close a pending connection, and mark the * response buffer as abort */ - sc_conn_shut(sc); - __fallthrough; + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); + sc->state = SC_ST_DIS; + break; case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; } /* This function is used for inter-stream connector calls. It is called by the @@ -884,7 +935,7 @@ static void sc_app_abort_applet(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -920,6 +971,7 @@ static void sc_app_shut_applet(struct stconn *sc) switch (sc->state) { case SC_ST_RDY: case SC_ST_EST: + /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. @@ -927,24 +979,31 @@ static void sc_app_shut_applet(struct stconn *sc) * no risk so we close both sides immediately. */ if (!(sc->flags & (SC_FL_ERROR|SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) && - !(ic->flags & CF_DONT_READ)) + !(ic->flags & CF_DONT_READ)) { + se_shutdown(sc->sedesc, SE_SHW_NORMAL); return; + } + + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); + sc->state = SC_ST_DIS; + break; - __fallthrough; case SC_ST_CON: case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: /* Note that none of these states may happen with applets */ - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; } /* chk_rcv function for applets */ @@ -1095,6 +1154,7 @@ void sc_notify(struct stconn *sc) */ if (sc_ep_have_ff_data(sc_opposite(sc)) || (co_data(ic) && sc_ep_test(sco, SE_FL_WAIT_DATA) && + (!HAS_DATA_FILTERS(__sc_strm(sc), ic) || channel_input_data(ic) == 0) && (!(sc->flags & SC_FL_SND_EXP_MORE) || channel_full(ic, co_data(ic)) || channel_input_data(ic) == 0))) { int new_len, last_len; @@ -1185,7 +1245,6 @@ static void sc_conn_eos(struct stconn *sc) if (sc_cond_forward_shut(sc)) { /* we want to immediately forward this close to the write side */ /* force flag on ssl to keep stream in cache */ - sc_conn_shutw(sc, CO_SHW_SILENT); goto do_close; } @@ -1194,7 +1253,7 @@ static void sc_conn_eos(struct stconn *sc) do_close: /* OK we completely close the socket here just as if we went through sc_shut[rw]() */ - sc_conn_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); sc->flags &= ~SC_FL_SHUT_WANTED; sc->flags |= SC_FL_SHUT_DONE; @@ -1253,17 +1312,7 @@ int sc_conn_recv(struct stconn *sc) /* prepare to detect if the mux needs more room */ sc_ep_clr(sc, SE_FL_WANT_ROOM); - if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !co_data(ic) && - global.tune.idle_timer && - (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) { - /* The buffer was empty and nothing was transferred for more - * than one second. This was caused by a pause and not by - * congestion. Reset any streaming mode to reduce latency. - */ - ic->xfer_small = 0; - ic->xfer_large = 0; - ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); - } + channel_check_idletimer(ic); #if defined(USE_LINUX_SPLICE) /* Detect if the splicing is possible depending on the stream policy */ @@ -1448,41 +1497,7 @@ int sc_conn_recv(struct stconn *sc) if (!cur_read) se_have_no_more_data(sc->sedesc); else { - if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && - (cur_read <= ic->buf.size / 2)) { - ic->xfer_large = 0; - ic->xfer_small++; - if (ic->xfer_small >= 3) { - /* we have read less than half of the buffer in - * one pass, and this happened at least 3 times. - * This is definitely not a streamer. - */ - ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); - } - else if (ic->xfer_small >= 2) { - /* if the buffer has been at least half full twice, - * we receive faster than we send, so at least it - * is not a "fast streamer". - */ - ic->flags &= ~CF_STREAMER_FAST; - } - } - else if (!(ic->flags & CF_STREAMER_FAST) && (cur_read >= channel_data_limit(ic))) { - /* we read a full buffer at once */ - ic->xfer_small = 0; - ic->xfer_large++; - if (ic->xfer_large >= 3) { - /* we call this buffer a fast streamer if it manages - * to be filled in one call 3 consecutive times. - */ - ic->flags |= (CF_STREAMER | CF_STREAMER_FAST); - } - } - else { - ic->xfer_small = 0; - ic->xfer_large = 0; - } - ic->last_read = now_ms; + channel_check_xfer(ic, cur_read); sc_ep_report_read_activity(sc); } @@ -1660,7 +1675,7 @@ int sc_conn_send(struct stconn *sc) if (s->txn->req.msg_state != HTTP_MSG_DONE) s->txn->flags &= ~TX_L7_RETRY; else { - if (b_alloc(&s->txn->l7_buffer) == NULL) + if (b_alloc(&s->txn->l7_buffer, DB_UNLIKELY) == NULL) s->txn->flags &= ~TX_L7_RETRY; else { memcpy(b_orig(&s->txn->l7_buffer), @@ -1673,6 +1688,9 @@ int sc_conn_send(struct stconn *sc) } } + if ((sc->flags & SC_FL_SHUT_WANTED) && co_data(oc) == c_data(oc)) + send_flag |= CO_SFL_LAST_DATA; + ret = conn->mux->snd_buf(sc, &oc->buf, co_data(oc), send_flag); if (ret > 0) { did_send = 1; @@ -1899,7 +1917,7 @@ static void sc_applet_eos(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -1908,6 +1926,352 @@ static void sc_applet_eos(struct stconn *sc) return sc_app_shut_applet(sc); } +/* + * This is the callback which is called by the applet layer to receive data into + * the buffer from the appctx. It iterates over the applet's rcv_buf + * function. Please do not statify this function, it's often present in + * backtraces, it's useful to recognize it. + */ +int sc_applet_recv(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct channel *ic = sc_ic(sc); + int ret, max, cur_read = 0; + int read_poll = MAX_READ_POLL_LOOPS; + int flags = 0; + + + /* If another call to sc_applet_recv() failed, give up now. + */ + if (sc_waiting_room(sc)) + return 0; + + /* maybe we were called immediately after an asynchronous abort */ + if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) + return 1; + + /* We must wait because the applet is not fully initialized */ + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + /* stop immediately on errors. */ + if (!sc_ep_test(sc, SE_FL_RCV_MORE)) { + // TODO: be sure SE_FL_RCV_MORE may be set for applet ? + if (sc_ep_test(sc, SE_FL_ERROR)) + goto end_recv; + } + + /* prepare to detect if the mux needs more room */ + sc_ep_clr(sc, SE_FL_WANT_ROOM); + + channel_check_idletimer(ic); + + /* First, let's see if we may fast-forward data from a side to the other + * one without using the channel buffer. + */ + if (sc_is_fastfwd_supported(sc)) { + if (channel_data(ic)) { + /* We're embarrassed, there are already data pending in + * the buffer and we don't want to have them at two + * locations at a time. Let's indicate we need some + * place and ask the consumer to hurry. + */ + flags |= CO_RFL_BUF_FLUSH; + goto abort_fastfwd; + } + ret = appctx_fastfwd(sc, ic->to_forward, flags); + if (ret < 0) + goto abort_fastfwd; + else if (ret > 0) { + if (ic->to_forward != CHN_INFINITE_FORWARD) + ic->to_forward -= ret; + ic->total += ret; + cur_read += ret; + ic->flags |= CF_READ_EVENT; + } + + if (sc_ep_test(sc, SE_FL_EOS | SE_FL_ERROR)) + goto end_recv; + + if (sc_ep_test(sc, SE_FL_WANT_ROOM)) + sc_need_room(sc, -1); + + if (sc_ep_test(sc, SE_FL_MAY_FASTFWD_PROD) && ic->to_forward) + goto done_recv; + } + + abort_fastfwd: + if (!sc_alloc_ibuf(sc, &appctx->buffer_wait)) + goto end_recv; + + /* For an HTX stream, if the buffer is stuck (no output data with some + * input data) and if the HTX message is fragmented or if its free space + * wraps, we force an HTX deframentation. It is a way to have a + * contiguous free space nad to let the mux to copy as much data as + * possible. + * + * NOTE: A possible optim may be to let the mux decides if defrag is + * required or not, depending on amount of data to be xferred. + */ + if (IS_HTX_STRM(__sc_strm(sc)) && !co_data(ic)) { + struct htx *htx = htxbuf(&ic->buf); + + if (htx_is_not_empty(htx) && ((htx->flags & HTX_FL_FRAGMENTED) || htx_space_wraps(htx))) + htx_defrag(htx, NULL, 0); + } + + /* Compute transient CO_RFL_* flags */ + if (co_data(ic)) { + flags |= (CO_RFL_BUF_WET | CO_RFL_BUF_NOT_STUCK); + } + + /* <max> may be null. This is the mux responsibility to set + * SE_FL_RCV_MORE on the SC if more space is needed. + */ + max = channel_recv_max(ic); + ret = appctx_rcv_buf(sc, &ic->buf, max, flags); + if (sc_ep_test(sc, SE_FL_WANT_ROOM)) { + /* SE_FL_WANT_ROOM must not be reported if the channel's + * buffer is empty. + */ + BUG_ON(c_empty(ic)); + + sc_need_room(sc, channel_recv_max(ic) + 1); + /* Add READ_PARTIAL because some data are pending but + * cannot be xferred to the channel + */ + ic->flags |= CF_READ_EVENT; + sc_ep_report_read_activity(sc); + } + + if (ret <= 0) { + /* if we refrained from reading because we asked for a flush to + * satisfy rcv_pipe(), report that there's not enough room here + * to proceed. + */ + if (flags & CO_RFL_BUF_FLUSH) + sc_need_room(sc, -1); + goto done_recv; + } + + cur_read += ret; + + /* if we're allowed to directly forward data, we must update ->o */ + if (ic->to_forward && !(sc_opposite(sc)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) { + unsigned long fwd = ret; + if (ic->to_forward != CHN_INFINITE_FORWARD) { + if (fwd > ic->to_forward) + fwd = ic->to_forward; + ic->to_forward -= fwd; + } + c_adv(ic, fwd); + } + + ic->flags |= CF_READ_EVENT; + ic->total += ret; + + /* End-of-input reached, we can leave. In this case, it is + * important to break the loop to not block the SC because of + * the channel's policies.This way, we are still able to receive + * shutdowns. + */ + if (sc_ep_test(sc, SE_FL_EOI)) + goto done_recv; + + if ((sc->flags & SC_FL_RCV_ONCE) || --read_poll <= 0) { + /* we don't expect to read more data */ + sc_wont_read(sc); + goto done_recv; + } + + /* if too many bytes were missing from last read, it means that + * it's pointless trying to read again because the system does + * not have them in buffers. + */ + if (ret < max) { + /* if a streamer has read few data, it may be because we + * have exhausted system buffers. It's not worth trying + * again. + */ + if (ic->flags & CF_STREAMER) { + /* we're stopped by the channel's policy */ + sc_wont_read(sc); + goto done_recv; + } + + /* if we read a large block smaller than what we requested, + * it's almost certain we'll never get anything more. + */ + if (ret >= global.tune.recv_enough) { + /* we're stopped by the channel's policy */ + sc_wont_read(sc); + } + } + + done_recv: + if (cur_read) { + channel_check_xfer(ic, cur_read); + sc_ep_report_read_activity(sc); + } + + end_recv: + ret = (cur_read != 0); + + /* Report EOI on the channel if it was reached from the mux point of + * view. */ + if (sc_ep_test(sc, SE_FL_EOI) && !(sc->flags & SC_FL_EOI)) { + sc_ep_report_read_activity(sc); + sc->flags |= SC_FL_EOI; + ic->flags |= CF_READ_EVENT; + ret = 1; + } + + if (sc_ep_test(sc, SE_FL_EOS)) { + /* we received a shutdown */ + if (ic->flags & CF_AUTO_CLOSE) + sc_schedule_shutdown(sc_opposite(sc)); + sc_applet_eos(sc); + ret = 1; + } + + if (sc_ep_test(sc, SE_FL_ERROR)) { + sc->flags |= SC_FL_ERROR; + ret = 1; + } + else if (cur_read || (sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))) { + se_have_more_data(sc->sedesc); + ret = 1; + } + + return ret; +} + +/* This tries to perform a synchronous receive on the stream connector to + * try to collect last arrived data. In practice it's only implemented on + * stconns. Returns 0 if nothing was done, non-zero if new data or a + * shutdown were collected. This may result on some delayed receive calls + * to be programmed and performed later, though it doesn't provide any + * such guarantee. + */ +int sc_applet_sync_recv(struct stconn *sc) +{ + if (!(__sc_appctx(sc)->flags & APPCTX_FL_INOUT_BUFS)) + return 0; + + if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) + return 0; + + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + if (!sc_is_recv_allowed(sc)) + return 0; // already failed + + return sc_applet_recv(sc); +} + +/* + * This function is called to send buffer data to an applet. It calls the + * applet's snd_buf function. Please do not statify this function, it's often + * present in backtraces, it's useful to recognize it. + */ +int sc_applet_send(struct stconn *sc) +{ + struct stconn *sco = sc_opposite(sc); + struct channel *oc = sc_oc(sc); + size_t ret; + int did_send = 0; + + if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) { + BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING)); + return 1; + } + + if (sc_ep_test(sc, SE_FL_WONT_CONSUME)) + return 0; + + /* we might have been called just after an asynchronous shutw */ + if (sc->flags & SC_FL_SHUT_DONE) + return 1; + + /* We must wait because the applet is not fully initialized */ + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + /* TODO: Splicing is not supported, so it is not possible to have FF data stuck into the I/O buf */ + BUG_ON(sc_ep_have_ff_data(sc)); + + if (co_data(oc)) { + unsigned int send_flag = 0; + + if ((sc->flags & SC_FL_SHUT_WANTED) && co_data(oc) == c_data(oc)) + send_flag |= CO_SFL_LAST_DATA; + + ret = appctx_snd_buf(sc, &oc->buf, co_data(oc), send_flag); + if (ret > 0) { + did_send = 1; + c_rew(oc, ret); + c_realign_if_empty(oc); + + if (!co_data(oc)) { + /* Always clear both flags once everything has been sent, they're one-shot */ + sc->flags &= ~(SC_FL_SND_ASAP|SC_FL_SND_EXP_MORE); + } + /* if some data remain in the buffer, it's only because the + * system buffers are full, we will try next time. + */ + } + } + + if (did_send) + oc->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; + + if (!sco->room_needed || (did_send && (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed))) + sc_have_room(sco); + + if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) { + oc->flags |= CF_WRITE_EVENT; + BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING)); + if (sc_ep_test(sc, SE_FL_ERROR)) + sc->flags |= SC_FL_ERROR; + return 1; + } + + if (!co_data(oc)) { + if (did_send) + sc_ep_report_send_activity(sc); + } + else { + sc_ep_report_blocked_send(sc, did_send); + } + + return did_send; +} + +void sc_applet_sync_send(struct stconn *sc) +{ + struct channel *oc = sc_oc(sc); + + oc->flags &= ~CF_WRITE_EVENT; + + if (!(__sc_appctx(sc)->flags & APPCTX_FL_INOUT_BUFS)) + return; + + if (sc->flags & SC_FL_SHUT_DONE) + return; + + if (!co_data(oc)) + return; + + if (!sc_state_in(sc->state, SC_SB_EST)) + return; + + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return; + + sc_applet_send(sc); +} + /* Callback to be used by applet handlers upon completion. It updates the stream * (which may or may not take this opportunity to try to forward data), then * may re-enable the applet's based on the channels and stream connector's final @@ -1960,7 +2324,8 @@ int sc_applet_process(struct stconn *sc) * appctx but in the case the task is not in runqueue we may have to * wakeup the appctx immediately. */ - if (sc_is_recv_allowed(sc) || sc_is_send_allowed(sc)) + if ((sc_is_recv_allowed(sc) && !applet_fl_test(__sc_appctx(sc), APPCTX_FL_OUTBLK_ALLOC)) || + (sc_is_send_allowed(sc) && !applet_fl_test(__sc_appctx(sc), APPCTX_FL_INBLK_ALLOC))) appctx_wakeup(__sc_appctx(sc)); return 0; } @@ -2036,6 +2401,57 @@ smp_fetch_sid(const struct arg *args, struct sample *smp, const char *kw, void * return 1; } +/* return 1 if the frontend or backend mux stream has received an abort and 0 otherwise. + */ +static int +smp_fetch_strm_aborted(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stconn *sc; + unsigned int aborted = 0; + + if (!smp->strm) + return 0; + + sc = (kw[0] == 'f' ? smp->strm->scf : smp->strm->scb); + if (sc->sedesc->abort_info.info) + aborted = 1; + + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_BOOL; + smp->data.u.sint = aborted; + + return 1; +} + +/* return the H2/QUIC RESET code of the frontend or backend mux stream. Any value + * means an a RST_STREAM was received on H2 and a STOP_SENDING on QUIC. Otherwise the sample fetch fails. + */ +static int +smp_fetch_strm_rst_code(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stconn *sc; + unsigned int source; + unsigned long long code = 0; + + if (!smp->strm) + return 0; + + sc = (kw[0] == 'f' ? smp->strm->scf : smp->strm->scb); + source = ((sc->sedesc->abort_info.info & SE_ABRT_SRC_MASK) >> SE_ABRT_SRC_SHIFT); + if (source != SE_ABRT_SRC_MUX_H2 && source != SE_ABRT_SRC_MUX_QUIC) { + if (!source) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + code = sc->sedesc->abort_info.code; + + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = code; + + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Note: fetches that may return multiple types should be declared using the * appropriate pseudo-type. If not available it must be declared as the lowest @@ -2043,7 +2459,11 @@ smp_fetch_sid(const struct arg *args, struct sample *smp, const char *kw, void * */ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "bs.id", smp_fetch_sid, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ }, + { "bs.aborted", smp_fetch_strm_aborted, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, + { "bs.rst_code", smp_fetch_strm_rst_code, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "fs.id", smp_fetch_sid, 0, NULL, SMP_T_STR, SMP_USE_L6RES }, + { "fs.aborted", smp_fetch_strm_aborted, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, + { "fs.rst_code", smp_fetch_strm_rst_code, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, { /* END */ }, }}; diff --git a/src/stick_table.c b/src/stick_table.c index b1ce9d4..08a22e4 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -46,6 +46,18 @@ #include <haproxy/tools.h> #include <haproxy/xxhash.h> +#if defined(USE_PROMEX) +#include <promex/promex.h> +#endif + +/* stick table base fields */ +enum sticktable_field { + STICKTABLE_SIZE = 0, + STICKTABLE_USED, + /* must always be the last one */ + STICKTABLE_TOTAL_FIELDS +}; + /* structure used to return a table key built from a sample */ static THREAD_LOCAL struct stktable_key static_table_key; @@ -98,15 +110,27 @@ void __stksess_free(struct stktable *t, struct stksess *ts) */ void stksess_free(struct stktable *t, struct stksess *ts) { + uint shard; + size_t len; void *data; + data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY); if (data) { dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict)); stktable_data_cast(data, std_t_dict) = NULL; } - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + __stksess_free(t, ts); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); } /* @@ -115,17 +139,25 @@ void stksess_free(struct stktable *t, struct stksess *ts) */ int __stksess_kill(struct stktable *t, struct stksess *ts) { + int updt_locked = 0; + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) return 0; - eb32_delete(&ts->exp); if (ts->upd.node.leaf_p) { + updt_locked = 1; HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + goto out_unlock; } + eb32_delete(&ts->exp); + eb32_delete(&ts->upd); ebmb_delete(&ts->key); __stksess_free(t, ts); + + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); return 1; } @@ -136,14 +168,26 @@ int __stksess_kill(struct stktable *t, struct stksess *ts) */ int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt) { + uint shard; + size_t len; int ret; if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) return 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); ret = __stksess_kill(t, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return ret; } @@ -203,6 +247,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) memset((void *)ts - t->data_size, 0, t->data_size); ts->ref_cnt = 0; ts->shard = 0; + ts->seen = 0; ts->key.node.leaf_p = NULL; ts->exp.node.leaf_p = NULL; ts->upd.node.leaf_p = NULL; @@ -215,100 +260,124 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) * Trash oldest <to_batch> sticky sessions from table <t> * Returns number of trashed sticky sessions. It may actually trash less * than expected if finding these requires too long a search time (e.g. - * most of them have ts->ref_cnt>0). + * most of them have ts->ref_cnt>0). This function locks the table. */ -int __stktable_trash_oldest(struct stktable *t, int to_batch) +int stktable_trash_oldest(struct stktable *t, int to_batch) { struct stksess *ts; struct eb32_node *eb; int max_search = to_batch * 2; // no more than 50% misses + int max_per_shard = (to_batch + CONFIG_HAP_TBL_BUCKETS - 1) / CONFIG_HAP_TBL_BUCKETS; + int done_per_shard; int batched = 0; - int looped = 0; + int updt_locked; + int looped; + int shard; - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); + shard = 0; while (batched < to_batch) { + done_per_shard = 0; + looped = 0; + updt_locked = 0; + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + while (batched < to_batch && done_per_shard < max_per_shard) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) + if (--max_search < 0) break; - } - if (--max_search < 0) - break; + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + eb32_delete(&ts->exp); - eb32_delete(&ts->exp); + if (ts->expire != ts->exp.key) { + if (!tick_isset(ts->expire)) + continue; - if (ts->expire != ts->exp.key) { - if (!tick_isset(ts->expire)) - continue; + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; + continue; + } - continue; - } + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + __stksess_free(t, ts); + batched++; + done_per_shard++; } - __stksess_free(t, ts); - batched++; - } - return batched; -} + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); -/* - * Trash oldest <to_batch> sticky sessions from table <t> - * Returns number of trashed sticky sessions. - * This function locks the table - */ -int stktable_trash_oldest(struct stktable *t, int to_batch) -{ - int ret; + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ret = __stktable_trash_oldest(t, to_batch); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + if (max_search <= 0) + break; - return ret; + shard = (shard + 1) % CONFIG_HAP_TBL_BUCKETS; + if (!shard) + break; + } + + return batched; } + /* * Allocate and initialise a new sticky session. * The new sticky session is returned or NULL in case of lack of memory. @@ -346,17 +415,17 @@ struct stksess *stksess_new(struct stktable *t, struct stktable_key *key) } /* - * Looks in table <t> for a sticky session matching key <key>. + * Looks in table <t> for a sticky session matching key <key> in shard <shard>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key) +struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1); + eb = ebst_lookup_len(&t->shards[shard].keys, key->key, key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1); else - eb = ebmb_lookup(&t->keys, key->key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, key->key, t->key_size); if (unlikely(!eb)) { /* no session found */ @@ -375,12 +444,60 @@ struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *k struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key) { struct stksess *ts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1; + else + len = t->key_size; - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - ts = __stktable_lookup_key(t, key); + shard = stktable_calc_shard_num(t, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + ts = __stktable_lookup_key(t, key, shard); if (ts) HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + return ts; +} + +/* + * Looks in table <t> for a sticky session matching ptr <ptr>. + * Returns pointer on requested sticky session or NULL if none was found. + * The refcount of the found entry is increased and this function + * is protected using the table lock + */ +struct stksess *stktable_lookup_ptr(struct stktable *t, void *ptr) +{ + struct stksess *ts = NULL; + struct ebmb_node *eb; + int shard; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + /* linear search is performed, this could be optimized by adding + * an eb node dedicated to ptr lookups into stksess struct to + * leverage eb_lookup function instead. + */ + eb = ebmb_first(&t->shards[shard].keys); + while (eb) { + struct stksess *cur; + + cur = ebmb_entry(eb, struct stksess, key); + if (cur == ptr) { + ts = cur; + break; + } + eb = ebmb_next(eb); + } + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + if (ts) + return ts; + } return ts; } @@ -389,14 +506,14 @@ struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key * Looks in table <t> for a sticky session with same key as <ts>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup(&(t->keys), (char *)ts->key.key); + eb = ebst_lookup(&t->shards[shard].keys, (char *)ts->key.key); else - eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, ts->key.key, t->key_size); if (unlikely(!eb)) return NULL; @@ -413,12 +530,21 @@ struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) { struct stksess *lts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - lts = __stktable_lookup(t, ts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + lts = __stktable_lookup(t, ts, shard); if (lts) HA_ATOMIC_INC(<s->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return lts; } @@ -428,7 +554,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) * The node will be also inserted into the update tree if needed, at a position * depending if the update is a local or coming from a remote node. * If <decrefcnt> is set, the ts entry's ref_cnt will be decremented. The table's - * write lock may be taken. + * updt_lock may be taken for writes. */ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt) { @@ -444,39 +570,18 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* If sync is enabled */ if (t->sync_task) { - try_lock_again: - /* We'll need to reliably check that the entry is in the tree. - * It's only inserted/deleted using a write lock so a read lock - * is sufficient to verify this. We may then need to upgrade it - * to perform an update (which is rare under load), and if the - * upgrade fails, we'll try again with a write lock directly. - */ - if (use_wrlock) - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->updt_lock); - if (local) { /* Check if this entry is not in the tree or not * scheduled for at least one peer. */ - if (!ts->upd.node.leaf_p - || (int)(t->commitupdate - ts->upd.key) >= 0 - || (int)(ts->upd.key - t->localupdate) >= 0) { - /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) { + /* Time to upgrade the read lock to write lock */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ + ts->seen = 0; ts->upd.key = ++t->update; t->localupdate = t->update; eb32_delete(&ts->upd); @@ -489,28 +594,30 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, do_wakeup = 1; } else { - /* If this entry is not in the tree */ - + /* Note: we land here when learning new entries from + * remote peers. We hold one ref_cnt so the entry + * cannot vanish under us, however if two peers create + * the same key at the exact same time, we must be + * careful not to perform two parallel inserts! Hence + * we need to first check leaf_p to know if the entry + * is new, then lock the tree and check the entry again + * (since another thread could have created it in the + * mean time). + */ if (!ts->upd.node.leaf_p) { /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ - - ts->upd.key= (++t->update)+(2147483648U); - eb = eb32_insert(&t->updates, &ts->upd); - if (eb != &ts->upd) { - eb32_delete(eb); - eb32_insert(&t->updates, &ts->upd); + if (!ts->upd.node.leaf_p) { + ts->seen = 0; + ts->upd.key= (++t->update)+(2147483648U); + eb = eb32_insert(&t->updates, &ts->upd); + if (eb != &ts->upd) { + eb32_delete(eb); + eb32_insert(&t->updates, &ts->upd); + } } } } @@ -518,8 +625,6 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* drop the lock now */ if (use_wrlock) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); } if (decrefcnt) @@ -569,14 +674,14 @@ static void stktable_release(struct stktable *t, struct stksess *ts) * is set. <ts> is returned if properly inserted, otherwise the one already * present if any. */ -struct stksess *__stktable_store(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_store(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; - eb = ebmb_insert(&t->keys, &ts->key, t->key_size); + eb = ebmb_insert(&t->shards[shard].keys, &ts->key, t->key_size); if (likely(eb == &ts->key)) { ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + eb32_insert(&t->shards[shard].exps, &ts->exp); } return ebmb_entry(eb, struct stksess, key); // most commonly this is <ts> } @@ -621,11 +726,24 @@ void stktable_requeue_exp(struct stktable *t, const struct stksess *ts) struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key) { struct stksess *ts, *ts2; + uint shard; + size_t len; if (!key) return NULL; - ts = stktable_lookup_key(table, key); + if (table->type == SMP_T_STR) + len = key->key_len + 1 < table->key_size ? key->key_len : table->key_size - 1; + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup_key(table, key, shard); + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (ts) return ts; @@ -645,12 +763,12 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * * one we find. */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); - ts2 = __stktable_store(table, ts); + ts2 = __stktable_store(table, ts, shard); HA_ATOMIC_INC(&ts2->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (unlikely(ts2 != ts)) { /* another entry was added in the mean time, let's @@ -671,12 +789,21 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) { struct stksess *ts; + uint shard; + size_t len; + + if (table->type == SMP_T_STR) + len = strlen((const char *)nts->key.key); + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, nts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->lock); - ts = __stktable_lookup(table, nts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup(table, nts, shard); if (ts) { HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); return ts; } ts = nts; @@ -684,18 +811,18 @@ struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) /* let's increment it before switching to exclusive */ HA_ATOMIC_INC(&ts->ref_cnt); - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->lock) != 0) { + if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->shards[shard].sh_lock) != 0) { /* upgrade to seek lock failed, let's drop and take */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); } else - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->shards[shard].sh_lock); /* now we're write-locked */ - __stktable_store(table, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + __stktable_store(table, ts, shard); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); stktable_requeue_exp(table, ts); return ts; @@ -710,87 +837,117 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int struct stktable *t = context; struct stksess *ts; struct eb32_node *eb; - int updt_locked = 0; - int looped = 0; + int updt_locked; + int looped; int exp_next; + int task_exp; + int shard; + + task_exp = TICK_ETERNITY; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + updt_locked = 0; + looped = 0; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + + while (1) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); - - while (1) { - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) - break; - } - - if (likely(tick_is_lt(now_ms, eb->key))) { - /* timer not expired yet, revisit it later */ - exp_next = eb->key; - goto out_unlock; - } + if (likely(tick_is_lt(now_ms, eb->key))) { + /* timer not expired yet, revisit it later */ + exp_next = eb->key; + goto out_unlock; + } - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - eb32_delete(&ts->exp); + eb32_delete(&ts->exp); - if (!tick_is_expired(ts->expire, now_ms)) { - if (!tick_isset(ts->expire)) - continue; + if (!tick_is_expired(ts->expire, now_ms)) { + if (!tick_isset(ts->expire)) + continue; - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; - continue; - } + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - if (!updt_locked) { - updt_locked = 1; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; } + + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); + __stksess_free(t, ts); } - __stksess_free(t, ts); - } - /* We have found no task to expire in any tree */ - exp_next = TICK_ETERNITY; + /* We have found no task to expire in any tree */ + exp_next = TICK_ETERNITY; -out_unlock: - task->expire = exp_next; - if (updt_locked) - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + + task_exp = tick_first(task_exp, exp_next); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + } + + /* Reset the task's expiration. We do this under the lock so as not + * to ruin a call to task_queue() in stktable_requeue_exp() if we + * were to update with TICK_ETERNITY. + */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + task->expire = task_exp; HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + return task; } @@ -803,12 +960,17 @@ out_unlock: int stktable_init(struct stktable *t, char **err_msg) { int peers_retval = 0; + int shard; t->hash_seed = XXH64(t->id, t->idlen, 0); if (t->size) { - t->keys = EB_ROOT_UNIQUE; - memset(&t->exps, 0, sizeof(t->exps)); + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + t->shards[shard].keys = EB_ROOT_UNIQUE; + memset(&t->shards[shard].exps, 0, sizeof(t->shards[shard].exps)); + HA_RWLOCK_INIT(&t->shards[shard].sh_lock); + } + t->updates = EB_ROOT_UNIQUE; HA_RWLOCK_INIT(&t->lock); @@ -1402,6 +1564,8 @@ struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = { [STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1, .as_is = 1 }, [STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 }, [STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY }, + [STKTABLE_DT_GLITCH_CNT] = { .name = "glitch_cnt", .std_type = STD_T_UINT }, + [STKTABLE_DT_GLITCH_RATE] = { .name = "glitch_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY }, }; /* Registers stick-table extra data type with index <idx>, name <name>, type @@ -1741,6 +1905,79 @@ static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct samp return !!ptr; } +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the cumulated number of front glitches for the + * key if the key is present in the table, otherwise zero, so that comparisons + * can be easily performed. If the inspected parameter is not stored in the + * table, <not found> is returned. + */ +static int sample_conv_table_glitch_cnt(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_CNT); + if (ptr) + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + stktable_release(t, ts); + return !!ptr; +} + +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the front glitch rate the key if the key is + * present in the table, otherwise zero, so that comparisons can be easily + * performed. If the inspected parameter is not stored in the table, <not found> + * is returned. + */ +static int sample_conv_table_glitch_rate(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_RATE); + if (ptr) + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + t->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + stktable_release(t, ts); + return !!ptr; +} + /* Casts sample <smp> to the type of the table specified in arg_p(1), and looks * it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key * if the key is present in the table, otherwise false, so that comparisons can @@ -4218,6 +4455,85 @@ smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw return 1; } +/* set <smp> to the cumulated number of glitches from the stream or session's + * tracked frontend counters. Supports being called as "sc[0-9]_glitch_cnt" or + * "src_glitch_cnt" only. + */ +static int +smp_fetch_sc_glitch_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_CNT); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + +/* set <smp> to the rate of glitches from the stream or session's tracked + * frontend counters. Supports being called as "sc[0-9]_glitch_rate" or + * "src_glitch_rate" only. + */ +static int +smp_fetch_sc_glitch_rate(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_RATE); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + /* set <smp> to the cumulated number of streams from the stream's tracked * frontend counters. Supports being called as "sc[0-9]_sess_cnt" or * "src_sess_cnt" only. @@ -4885,6 +5201,7 @@ struct show_table_ctx { void *target; /* table we want to dump, or NULL for all */ struct stktable *t; /* table being currently dumped (first if NULL) */ struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */ + int tree_head; /* tree head currently being visited */ long long value[STKTABLE_FILTER_LEN]; /* value to compare against */ signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */ signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */ @@ -4896,39 +5213,22 @@ struct show_table_ctx { char action; /* action on the table : one of STK_CLI_ACT_* */ }; -/* Processes a single table entry matching a specific key passed in argument. - * returns 0 if wants to be called again, 1 if has ended processing. +/* Processes a single table entry <ts>. + * returns 0 if it wants to be called again, 1 if has ended processing. */ -static int table_process_entry_per_key(struct appctx *appctx, char **args) +static int table_process_entry(struct appctx *appctx, struct stksess *ts, char **args) { struct show_table_ctx *ctx = appctx->svcctx; struct stktable *t = ctx->target; - struct stksess *ts; - struct sample key; long long value; int data_type; int cur_arg; void *ptr; struct freq_ctr *frqp; - if (!*args[4]) - return cli_err(appctx, "Key value expected\n"); - - memset(&key, 0, sizeof(key)); - key.data.type = SMP_T_STR; - key.data.u.str.area = args[4]; - key.data.u.str.data = strlen(args[4]); - switch (t->type) { case SMP_T_IPV4: case SMP_T_IPV6: - /* prefer input format over table type when parsing ip addresses, - * then let smp_to_stkey() do the conversion for us when needed - */ - BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); - if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) - return cli_err(appctx, "Invalid key\n"); - break; case SMP_T_SINT: case SMP_T_STR: break; @@ -4945,21 +5245,15 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) } } - /* try to convert key according to table type - * (it will fill static_table_key on success) - */ - if (!smp_to_stkey(&key, t)) - return cli_err(appctx, "Invalid key\n"); - /* check permissions */ if (!cli_has_level(appctx, ACCESS_LVL_OPER)) return 1; + if (!ts) + return 1; + switch (ctx->action) { case STK_CLI_ACT_SHOW: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; chunk_reset(&trash); if (!table_dump_head_to_buffer(&trash, appctx, t, t)) { stktable_release(t, ts); @@ -4976,10 +5270,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_CLR: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; - if (!stksess_kill(t, ts, 1)) { /* don't delete an entry which is currently referenced */ return cli_err(appctx, "Entry currently in use, cannot remove\n"); @@ -4987,11 +5277,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_SET: - ts = stktable_get_entry(t, &static_table_key); - if (!ts) { - /* don't delete an entry which is currently referenced */ - return cli_err(appctx, "Unable to allocate a new entry\n"); - } HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) { if (strncmp(args[cur_arg], "data.", 5) != 0) { @@ -5023,7 +5308,7 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return 1; } - ptr = stktable_data_ptr(t, ts, data_type); + ptr = __stktable_data_ptr(t, ts, data_type); switch (stktable_data_types[data_type].std_type) { case STD_T_SINT: @@ -5060,6 +5345,82 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return cli_err(appctx, "Unknown action\n"); } return 1; + +} + +/* Processes a single table entry matching a specific key passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_key(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + struct stksess *ts; + struct sample key; + + if (!*args[4]) + return cli_err(appctx, "Key value expected\n"); + + memset(&key, 0, sizeof(key)); + key.data.type = SMP_T_STR; + key.data.u.str.area = args[4]; + key.data.u.str.data = strlen(args[4]); + + switch (t->type) { + case SMP_T_IPV4: + case SMP_T_IPV6: + /* prefer input format over table type when parsing ip addresses, + * then let smp_to_stkey() do the conversion for us when needed + */ + BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); + if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) + return cli_err(appctx, "Invalid key\n"); + break; + default: + /* nothing to do */ + break; + } + + /* try to convert key according to table type + * (it will fill static_table_key on success) + */ + if (!smp_to_stkey(&key, t)) + return cli_err(appctx, "Invalid key\n"); + + if (ctx->action == STK_CLI_ACT_SET) { + ts = stktable_get_entry(t, &static_table_key); + if (!ts) + return cli_err(appctx, "Unable to allocate a new entry\n"); + } else + ts = stktable_lookup_key(t, &static_table_key); + + return table_process_entry(appctx, ts, args); +} + +/* Processes a single table entry matching a specific ptr passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_ptr(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + ulong ptr; + char *error; + struct stksess *ts; + + if (!*args[4] || args[4][0] != '0' || args[4][1] != 'x') + return cli_err(appctx, "Pointer expected (0xffff notation)\n"); + + /* Convert argument to integer value */ + ptr = strtoul(args[4], &error, 16); + if (*error != '\0') + return cli_err(appctx, "Malformed ptr.\n"); + + ts = stktable_lookup_ptr(t, (void *)ptr); + if (!ts) + return cli_err(appctx, "No entry can be found matching ptr.\n"); + + return table_process_entry(appctx, ts, args); } /* Prepares the appctx fields with the data-based filters from the command line. @@ -5127,6 +5488,8 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx if (strcmp(args[3], "key") == 0) return table_process_entry_per_key(appctx, args); + if (strcmp(args[3], "ptr") == 0) + return table_process_entry_per_ptr(appctx, args); else if (strncmp(args[3], "data.", 5) == 0) return table_prepare_data_request(appctx, args); else if (*args[3]) @@ -5137,11 +5500,11 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx err_args: switch (ctx->action) { case STK_CLI_ACT_SHOW: - return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n"); + return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> or key <key> or ptr <ptr>\n"); case STK_CLI_ACT_CLR: - return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n"); + return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key> or <table> ptr <ptr>\n"); case STK_CLI_ACT_SET: - return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n"); + return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]* or <table> ptr <ptr> [data.<store_data_type> <value>]*\n"); default: return cli_err(appctx, "Unknown action\n"); } @@ -5159,6 +5522,7 @@ static int cli_io_handler_table(struct appctx *appctx) struct ebmb_node *eb; int skip_entry; int show = ctx->action == STK_CLI_ACT_SHOW; + int shard = ctx->tree_head; /* * We have 3 possible states in ctx->state : @@ -5170,14 +5534,6 @@ static int cli_io_handler_table(struct appctx *appctx) * - STATE_DONE : nothing left to dump, the buffer may contain some * data though. */ - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* in case of abort, remove any refcount we might have set on an entry */ - if (ctx->state == STATE_DUMP) { - stksess_kill_if_expired(ctx->t, ctx->entry, 1); - } - return 1; - } chunk_reset(&trash); @@ -5192,22 +5548,30 @@ static int cli_io_handler_table(struct appctx *appctx) } if (ctx->t->size) { - if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) + if (show && !shard && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) return 0; if (ctx->target && (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) { /* dump entries only if table explicitly requested */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); - eb = ebmb_first(&ctx->t->keys); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + eb = ebmb_first(&ctx->t->shards[shard].keys); if (eb) { ctx->entry = ebmb_entry(eb, struct stksess, key); HA_ATOMIC_INC(&ctx->entry->ref_cnt); ctx->state = STATE_DUMP; - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + + /* we come here if we didn't find any entry in this shard */ + shard = ++ctx->tree_head; + if (shard < CONFIG_HAP_TBL_BUCKETS) + break; // try again on new shard + + /* fall through next table */ + shard = ctx->tree_head = 0; } } ctx->t = ctx->t->next; @@ -5275,7 +5639,7 @@ static int cli_io_handler_table(struct appctx *appctx) HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ctx->entry->ref_cnt); eb = ebmb_next(&ctx->entry->key); @@ -5287,7 +5651,7 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !ctx->entry->ref_cnt) __stksess_kill(ctx->t, old); HA_ATOMIC_INC(&ctx->entry->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } @@ -5297,9 +5661,13 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt)) __stksess_kill(ctx->t, ctx->entry); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); - ctx->t = ctx->t->next; + shard = ++ctx->tree_head; + if (shard >= CONFIG_HAP_TBL_BUCKETS) { + shard = ctx->tree_head = 0; + ctx->t = ctx->t->next; + } ctx->state = STATE_NEXT; break; @@ -5481,6 +5849,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN }, + { "sc_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc_glitch_rate", smp_fetch_sc_glitch_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5509,6 +5879,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5536,6 +5908,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5562,6 +5936,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5591,6 +5967,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, @@ -5632,6 +6010,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_cnt", sample_conv_table_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_rate", sample_conv_table_glitch_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, @@ -5656,3 +6036,73 @@ static struct cfg_kw_list cfg_kws = {{ },{ }}; INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + + +#if defined(USE_PROMEX) + +static int stk_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc) +{ + switch (id) { + case STICKTABLE_SIZE: + *metric = (struct promex_metric){ .n = ist("size"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Stick table size."); + break; + case STICKTABLE_USED: + *metric = (struct promex_metric){ .n = ist("used"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Number of entries used in this stick table."); + break; + default: + return -1; + } + return 1; +} + +static void *stk_promex_start_ts(void *unused, unsigned int id) +{ + return stktables_list; +} + +static void *stk_promex_next_ts(void *unused, void *metric_ctx, unsigned int id) +{ + struct stktable *t = metric_ctx; + + return t->next; +} + +static int stk_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field) +{ + struct stktable *t = metric_ctx; + + if (!t->size) + return 0; + + labels[0].name = ist("name"); + labels[0].value = ist(t->id); + labels[1].name = ist("type"); + labels[1].value = ist(stktable_types[t->type].kw); + + switch (id) { + case STICKTABLE_SIZE: + *field = mkf_u32(FN_GAUGE, t->size); + break; + case STICKTABLE_USED: + *field = mkf_u32(FN_GAUGE, t->current); + break; + default: + return -1; + } + return 1; +} + +static struct promex_module promex_sticktable_module = { + .name = IST("sticktable"), + .metric_info = stk_promex_metric_info, + .start_ts = stk_promex_start_ts, + .next_ts = stk_promex_next_ts, + .fill_ts = stk_promex_fill_ts, + .nb_metrics = STICKTABLE_TOTAL_FIELDS, +}; + +INITCALL1(STG_REGISTER, promex_register_module, &promex_sticktable_module); + +#endif diff --git a/src/stream.c b/src/stream.c index e643a6d..ed5c268 100644 --- a/src/stream.c +++ b/src/stream.c @@ -320,15 +320,13 @@ int stream_buf_available(void *arg) { struct stream *s = arg; - if (!s->req.buf.size && !sc_ep_have_ff_data(s->scb) && s->scf->flags & SC_FL_NEED_BUFF && - b_alloc(&s->req.buf)) + if (!s->req.buf.size && !sc_ep_have_ff_data(s->scb) && s->scf->flags & SC_FL_NEED_BUFF) sc_have_buff(s->scf); - else if (!s->res.buf.size && !sc_ep_have_ff_data(s->scf) && s->scb->flags & SC_FL_NEED_BUFF && - b_alloc(&s->res.buf)) + + if (!s->res.buf.size && !sc_ep_have_ff_data(s->scf) && s->scb->flags & SC_FL_NEED_BUFF) sc_have_buff(s->scb); - else - return 0; + s->flags |= SF_MAYALLOC; task_wakeup(s->task, TASK_WOKEN_RES); return 1; @@ -632,8 +630,7 @@ void stream_free(struct stream *s) } /* We may still be present in the buffer wait queue */ - if (LIST_INLIST(&s->buffer_wait.list)) - LIST_DEL_INIT(&s->buffer_wait.list); + b_dequeue(&s->buffer_wait); if (s->req.buf.size || s->res.buf.size) { int count = !!s->req.buf.size + !!s->res.buf.size; @@ -752,8 +749,12 @@ void stream_free(struct stream *s) */ static int stream_alloc_work_buffer(struct stream *s) { - if (b_alloc(&s->res.buf)) + if (b_alloc(&s->res.buf, DB_CHANNEL | ((s->flags & SF_MAYALLOC) ? DB_F_NOQUEUE : 0))) { + s->flags &= ~SF_MAYALLOC; return 1; + } + + b_requeue(DB_CHANNEL, &s->buffer_wait); return 0; } @@ -920,7 +921,7 @@ void back_establish(struct stream *s) if (!IS_HTX_STRM(s)) { /* let's allow immediate data connection in this case */ /* if the user wants to log as soon as possible, without counting * bytes from the server, then this is the right moment. */ - if (!LIST_ISEMPTY(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) { + if (!lf_expr_isempty(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) { /* note: no pend_pos here, session is established */ s->logs.t_close = s->logs.t_connect; /* to get a valid end date */ s->do_log(s); @@ -1736,8 +1737,8 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) scb = s->scb; /* First, attempt to receive pending data from I/O layers */ - sc_conn_sync_recv(scf); - sc_conn_sync_recv(scb); + sc_sync_recv(scf); + sc_sync_recv(scb); /* Let's check if we're looping without making any progress, e.g. due * to a bogus analyser or the fact that we're ignoring a read0. The @@ -1794,25 +1795,12 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } resync_stconns: - /* below we may emit error messages so we have to ensure that we have - * our buffers properly allocated. If the allocation failed, an error is - * triggered. - * - * NOTE: An error is returned because the mechanism to queue entities - * waiting for a buffer is totally broken for now. However, this - * part must be refactored. When it will be handled, this part - * must be be reviewed too. - */ if (!stream_alloc_work_buffer(s)) { - scf->flags |= SC_FL_ERROR; - s->conn_err_type = STRM_ET_CONN_RES; - - scb->flags |= SC_FL_ERROR; - s->conn_err_type = STRM_ET_CONN_RES; - - if (!(s->flags & SF_ERR_MASK)) - s->flags |= SF_ERR_RESOURCE; - sess_set_term_flags(s); + scf->flags &= ~SC_FL_DONT_WAKE; + scb->flags &= ~SC_FL_DONT_WAKE; + /* we're stuck for now */ + t->expire = TICK_ETERNITY; + goto leave; } /* 1b: check for low-level errors reported at the stream connector. @@ -2349,7 +2337,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } /* Let's see if we can send the pending request now */ - sc_conn_sync_send(scb); + sc_sync_send(scb); /* * Now forward all shutdown requests between both sides of the request buffer @@ -2459,7 +2447,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) scf_flags = (scf_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)); /* Let's see if we can send the pending response now */ - sc_conn_sync_send(scf); + sc_sync_send(scf); /* * Now forward all shutdown requests between both sides of the buffer @@ -2552,7 +2540,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) stream_handle_timeouts(s); goto resync_stconns; } - + leave: s->pending_events &= ~(TASK_WOKEN_TIMER | TASK_WOKEN_RES); stream_release_buffers(s); @@ -2597,7 +2585,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } /* let's do a final log if we need it */ - if (!LIST_ISEMPTY(&sess->fe->logformat) && s->logs.logwait && + if (!lf_expr_isempty(&sess->fe->logformat) && s->logs.logwait && !(s->flags & SF_MONITOR) && (!(sess->fe->options & PR_O_NULLNOLOG) || req->total)) { /* we may need to know the position in the queue */ @@ -2847,7 +2835,7 @@ INITCALL0(STG_INIT, init_stream); * If an ID is already stored within the stream nothing happens existing unique ID is * returned. */ -struct ist stream_generate_unique_id(struct stream *strm, struct list *format) +struct ist stream_generate_unique_id(struct stream *strm, struct lf_expr *format) { if (isttest(strm->unique_id)) { return strm->unique_id; @@ -3494,9 +3482,8 @@ void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const ch * buffer is full and it needs to be called again, otherwise non-zero. It is * designed to be called from stats_dump_strm_to_buffer() below. */ -static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm) +static int stats_dump_full_strm_to_buffer(struct appctx *appctx, struct stream *strm) { - struct appctx *appctx = __sc_appctx(sc); struct show_sess_ctx *ctx = appctx->svcctx; chunk_reset(&trash); @@ -3588,7 +3575,6 @@ static int cli_parse_show_sess(char **args, char *payload, struct appctx *appctx static int cli_io_handler_dump_sess(struct appctx *appctx) { struct show_sess_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct connection *conn; thread_isolate(); @@ -3598,18 +3584,6 @@ static int cli_io_handler_dump_sess(struct appctx *appctx) goto done; } - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last stream being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) { - LIST_DELETE(&ctx->bref.users); - LIST_INIT(&ctx->bref.users); - } - goto done; - } - chunk_reset(&trash); /* first, let's detach the back-ref from a possible previous stream */ @@ -3666,7 +3640,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx) LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users); /* call the proper dump() function and return if we're missing space */ - if (!stats_dump_full_strm_to_buffer(sc, curr_strm)) + if (!stats_dump_full_strm_to_buffer(appctx, curr_strm)) goto full; /* stream dump complete */ @@ -4036,6 +4010,19 @@ static int smp_fetch_id32(const struct arg *args, struct sample *smp, const char return 1; } +static int smp_fetch_redispatched(const struct arg *args, struct sample *smp, const char *km, void *private) +{ + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_BOOL; + if (!smp->strm) + return 0; + + if (!sc_state_in(smp->strm->scb->state, SC_SB_DIS|SC_SB_CLO)) + smp->flags |= SMP_F_VOL_TEST; + smp->data.u.sint = !!(smp->strm->flags & SF_REDISP); + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Please take care of keeping this list alphabetically sorted. */ @@ -4047,6 +4034,7 @@ static struct sample_fetch_kw_list smp_kws = {ILH, { { "last_rule_line", smp_fetch_last_rule_line, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "txn.conn_retries", smp_fetch_conn_retries, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV, }, { "txn.id32", smp_fetch_id32, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "txn.redispatched", smp_fetch_redispatched, 0, NULL, SMP_T_BOOL, SMP_USE_L4SRV, }, { "txn.sess_term_state",smp_fetch_sess_term_state, 0, NULL, SMP_T_STR, SMP_USE_INTRN, }, { NULL, NULL, 0, 0, 0 }, }}; diff --git a/src/systemd.c b/src/systemd.c new file mode 100644 index 0000000..fb36dd9 --- /dev/null +++ b/src/systemd.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: MIT-0 */ + +/* Implement the systemd notify protocol without external dependencies. + * Supports both readiness notification on startup and on reloading, + * according to the protocol defined at: + * https://www.freedesktop.org/software/systemd/man/latest/sd_notify.html + * This protocol is guaranteed to be stable as per: + * https://systemd.io/PORTABILITY_AND_STABILITY/ + * + */ + +#include <errno.h> +#include <inttypes.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <time.h> +#include <unistd.h> +#include <stdarg.h> + +#include <haproxy/tools.h> + +/* + * standalone reimplementation of sd_notify from the libsystemd + * Return: + * -errno in case of error + * 0 when ignored + * >0 when succeeded + * + * Will send <message> over the NOTIFY_SOCKET. + * When unset_environement is set, unsetenv NOTIFY_SOCKET. + */ +int sd_notify(int unset_environment, const char *message) +{ + union sockaddr_union { + struct sockaddr sa; + struct sockaddr_un sun; + } socket_addr = { + .sun.sun_family = AF_UNIX, + }; + int ret = 1; + int fd = -1; + size_t path_length, message_length; + const char *socket_path; + ssize_t written; + + socket_path = getenv("NOTIFY_SOCKET"); + if (!socket_path) { + ret = 0; /* Not running under systemd? Nothing to do */ + goto end; + } + + if (unset_environment) + unsetenv("NOTIFY_SOCKET"); + + if (!message) { + ret = -EINVAL; + goto end; + } + + message_length = strlen(message); + if (message_length == 0) { + ret = -EINVAL; + goto end; + } + + /* Only AF_UNIX is supported, with path or abstract sockets */ + if (socket_path[0] != '/' && socket_path[0] != '@') { + ret = -EAFNOSUPPORT; + goto end; + } + + path_length = strlen(socket_path); + /* Ensure there is room for NUL byte */ + if (path_length >= sizeof(socket_addr.sun.sun_path)) { + ret = -E2BIG; + goto end; + } + + memcpy(socket_addr.sun.sun_path, socket_path, path_length); + + /* Support for abstract socket */ + if (socket_addr.sun.sun_path[0] == '@') + socket_addr.sun.sun_path[0] = 0; + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) { + ret = -errno; + goto end; + } + + if (connect(fd, &socket_addr.sa, offsetof(struct sockaddr_un, sun_path) + path_length) != 0) { + ret = -errno; + goto end; + } + + written = write(fd, message, message_length); + if (written != (ssize_t) message_length) { + ret = written < 0 ? -errno : -EPROTO; + goto end; + } + +end: + if (fd > -1) + close(fd); + return ret; /* Notified! */ +} + +/* va_args variant of sd_notify */ +int sd_notifyf(int unset_environment, const char *format, ...) +{ + int r; + va_list args; + char *strp = NULL; + + va_start(args, format); + strp = memvprintf(&strp, format, args); + va_end(args); + + if (strp == NULL) { + r = -ENOMEM; + goto end; + } + + r = sd_notify(unset_environment, strp); + free(strp); +end: + return r; +} + diff --git a/src/tcp_act.c b/src/tcp_act.c index 8b44047..c9c4a5c 100644 --- a/src/tcp_act.c +++ b/src/tcp_act.c @@ -71,6 +71,29 @@ static enum act_return tcp_action_attach_srv(struct act_rule *rule, struct proxy return ACT_RET_CONT; } +/* tries to extract integer value from rule's argument: + * if expr is set, computes expr and sets the result into <value> + * else, it's already a numerical value, use it as-is. + * + * Returns 1 on success and 0 on failure. + */ +static int extract_int_from_rule(struct act_rule *rule, + struct proxy *px, struct session *sess, struct stream *s, + int *value) +{ + struct sample *smp; + + if (!rule->arg.expr_int.expr) { + *value = rule->arg.expr_int.value; + return 1; + } + smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr_int.expr, SMP_T_SINT); + if (!smp) + return 0; + *value = smp->data.u.sint; + return 1; +} + /* * Execute the "set-src" action. May be called from {tcp,http}request. * It only changes the address and tries to preserve the original port. If the @@ -389,19 +412,57 @@ static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct #if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE) -static enum act_return tcp_action_set_mark(struct act_rule *rule, struct proxy *px, - struct session *sess, struct stream *s, int flags) +static enum act_return tcp_action_set_fc_mark(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) +{ + unsigned int mark; + + if (extract_int_from_rule(rule, px, sess, s, (int *)&mark)) + conn_set_mark(objt_conn(sess->origin), mark); + return ACT_RET_CONT; +} +static enum act_return tcp_action_set_bc_mark(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) { - conn_set_mark(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]); + struct connection __maybe_unused *conn = (s && s->scb) ? sc_conn(s->scb) : NULL; + unsigned int mark; + + BUG_ON(!s || conn); + if (extract_int_from_rule(rule, px, sess, s, (int *)&mark)) { + /* connection does not exist yet, ensure it will be applied + * before connection is used by saving it within the stream + */ + s->bc_mark = mark; + s->flags |= SF_BC_MARK; + } return ACT_RET_CONT; } #endif #ifdef IP_TOS -static enum act_return tcp_action_set_tos(struct act_rule *rule, struct proxy *px, - struct session *sess, struct stream *s, int flags) +static enum act_return tcp_action_set_fc_tos(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) +{ + int tos; + + if (extract_int_from_rule(rule, px, sess, s, &tos)) + conn_set_tos(objt_conn(sess->origin), tos); + return ACT_RET_CONT; +} +static enum act_return tcp_action_set_bc_tos(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) { - conn_set_tos(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]); + struct connection __maybe_unused *conn = (s && s->scb) ? sc_conn(s->scb) : NULL; + int tos; + + BUG_ON(!s || conn); + if (extract_int_from_rule(rule, px, sess, s, &tos)) { + /* connection does not exist yet, ensure it will be applied + * before connection is used by saving it within the stream + */ + s->bc_tos = tos; + s->flags |= SF_BC_TOS; + } return ACT_RET_CONT; } #endif @@ -423,6 +484,14 @@ static void release_set_src_dst_action(struct act_rule *rule) release_sample_expr(rule->arg.expr); } +/* + * Release expr_int rule argument when action is no longer used + */ +static __maybe_unused void release_expr_int_action(struct act_rule *rule) +{ + release_sample_expr(rule->arg.expr_int.expr); +} + static int tcp_check_attach_srv(struct act_rule *rule, struct proxy *px, char **err) { struct proxy *be = NULL; @@ -451,10 +520,16 @@ static int tcp_check_attach_srv(struct act_rule *rule, struct proxy *px, char ** return 0; } - if ((rule->arg.attach_srv.name && (!srv->use_ssl || !srv->sni_expr)) || - (!rule->arg.attach_srv.name && srv->use_ssl && srv->sni_expr)) { - memprintf(err, "attach-srv rule: connection will never be used; either specify name argument in conjunction with defined SSL SNI on targeted server or none of these"); - return 0; + if (rule->arg.attach_srv.name) { + if (!srv->pool_conn_name) { + memprintf(err, "attach-srv rule has a name argument while server '%s/%s' does not use pool-conn-name; either reconfigure the server or remove the name argument from this attach-srv rule", ist0(be_name), ist0(sv_name)); + return 0; + } + } else { + if (srv->pool_conn_name) { + memprintf(err, "attach-srv rule has no name argument while server '%s/%s' uses pool-conn-name; either add a name argument to the attach-srv rule or reconfigure the server", ist0(be_name), ist0(sv_name)); + return 0; + } } rule->arg.attach_srv.srv = srv; @@ -565,29 +640,56 @@ static enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg /* Parse a "set-mark" action. It takes the MARK value as argument. It returns * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error. */ -static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, struct proxy *px, - struct act_rule *rule, char **err) +static enum act_parse_ret tcp_parse_set_mark(const char **args, int *orig_arg, struct proxy *px, + struct act_rule *rule, char **err) { #if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE) + struct sample_expr *expr; char *endp; - unsigned int mark; + unsigned int where; + int cur_arg = *orig_arg; - if (!*args[*cur_arg]) { - memprintf(err, "expects exactly 1 argument (integer/hex value)"); + if (!*args[*orig_arg]) { + memprintf(err, "expects an argument"); return ACT_RET_PRS_ERR; } - mark = strtoul(args[*cur_arg], &endp, 0); - if (endp && *endp != '\0') { - memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp); - return ACT_RET_PRS_ERR; + + /* value may be either an unsigned integer or an expression */ + rule->arg.expr_int.expr = NULL; + rule->arg.expr_int.value = strtoul(args[*orig_arg], &endp, 0); + if (*endp == '\0') { + /* valid unsigned integer */ + (*orig_arg)++; } + else { + /* invalid unsigned integer, fallback to expr */ + expr = sample_parse_expr((char **)args, orig_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL); + if (!expr) + return ACT_RET_PRS_ERR; - (*cur_arg)++; + where = 0; + if (px->cap & PR_CAP_FE) + where |= SMP_VAL_FE_HRQ_HDR; + if (px->cap & PR_CAP_BE) + where |= SMP_VAL_BE_HRQ_HDR; + + if (!(expr->fetch->val & where)) { + memprintf(err, + "fetch method '%s' extracts information from '%s', none of which is available here", + args[cur_arg-1], sample_src_names(expr->fetch->use)); + free(expr); + return ACT_RET_PRS_ERR; + } + rule->arg.expr_int.expr = expr; + } /* Register processing function. */ - rule->action_ptr = tcp_action_set_mark; + if (strcmp("set-bc-mark", args[cur_arg - 1]) == 0) + rule->action_ptr = tcp_action_set_bc_mark; + else + rule->action_ptr = tcp_action_set_fc_mark; // fc mark rule->action = ACT_CUSTOM; - rule->arg.act.p[0] = (void *)(uintptr_t)mark; + rule->release_ptr = release_expr_int_action; global.last_checks |= LSTCHK_NETADM; return ACT_RET_PRS_OK; #else @@ -600,29 +702,56 @@ static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, st /* Parse a "set-tos" action. It takes the TOS value as argument. It returns * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error. */ -static enum act_parse_ret tcp_parse_set_tos(const char **args, int *cur_arg, struct proxy *px, - struct act_rule *rule, char **err) +static enum act_parse_ret tcp_parse_set_tos(const char **args, int *orig_arg, struct proxy *px, + struct act_rule *rule, char **err) { #ifdef IP_TOS + struct sample_expr *expr; char *endp; - int tos; + unsigned int where; + int cur_arg = *orig_arg; - if (!*args[*cur_arg]) { - memprintf(err, "expects exactly 1 argument (integer/hex value)"); + if (!*args[*orig_arg]) { + memprintf(err, "expects an argument"); return ACT_RET_PRS_ERR; } - tos = strtol(args[*cur_arg], &endp, 0); - if (endp && *endp != '\0') { - memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp); - return ACT_RET_PRS_ERR; + + /* value may be either an integer or an expression */ + rule->arg.expr_int.expr = NULL; + rule->arg.expr_int.value = strtol(args[*orig_arg], &endp, 0); + if (*endp == '\0') { + /* valid integer */ + (*orig_arg)++; } + else { + /* invalid unsigned integer, fallback to expr */ + expr = sample_parse_expr((char **)args, orig_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL); + if (!expr) + return ACT_RET_PRS_ERR; - (*cur_arg)++; + where = 0; + if (px->cap & PR_CAP_FE) + where |= SMP_VAL_FE_HRQ_HDR; + if (px->cap & PR_CAP_BE) + where |= SMP_VAL_BE_HRQ_HDR; + + if (!(expr->fetch->val & where)) { + memprintf(err, + "fetch method '%s' extracts information from '%s', none of which is available here", + args[cur_arg-1], sample_src_names(expr->fetch->use)); + free(expr); + return ACT_RET_PRS_ERR; + } + rule->arg.expr_int.expr = expr; + } /* Register processing function. */ - rule->action_ptr = tcp_action_set_tos; + if (strcmp("set-bc-tos", args[cur_arg - 1]) == 0) + rule->action_ptr = tcp_action_set_bc_tos; + else + rule->action_ptr = tcp_action_set_fc_tos; // fc tos rule->action = ACT_CUSTOM; - rule->arg.act.p[0] = (void *)(uintptr_t)tos; + rule->release_ptr = release_expr_int_action; return ACT_RET_PRS_OK; #else memprintf(err, "not supported on this platform (IP_TOS undefined)"); @@ -672,10 +801,12 @@ static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *cur_arg, static struct action_kw_list tcp_req_conn_actions = {ILH, { { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -686,10 +817,12 @@ static struct action_kw_list tcp_req_sess_actions = {ILH, { { "attach-srv" , tcp_parse_attach_srv }, { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -697,12 +830,16 @@ static struct action_kw_list tcp_req_sess_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_actions); static struct action_kw_list tcp_req_cont_actions = {ILH, { - { "set-src", tcp_parse_set_src_dst }, - { "set-src-port", tcp_parse_set_src_dst }, + { "set-bc-mark", tcp_parse_set_mark }, + { "set-bc-tos", tcp_parse_set_tos }, { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-src", tcp_parse_set_src_dst }, + { "set-src-port", tcp_parse_set_src_dst }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -710,8 +847,10 @@ static struct action_kw_list tcp_req_cont_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions); static struct action_kw_list tcp_res_cont_actions = {ILH, { - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -719,12 +858,16 @@ static struct action_kw_list tcp_res_cont_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions); static struct action_kw_list http_req_actions = {ILH, { + { "set-bc-mark", tcp_parse_set_mark }, + { "set-bc-tos", tcp_parse_set_tos }, { "set-dst", tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -732,8 +875,10 @@ static struct action_kw_list http_req_actions = {ILH, { INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions); static struct action_kw_list http_res_actions = {ILH, { - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; diff --git a/src/tcpcheck.c b/src/tcpcheck.c index d30ecb5..b4f9590 100644 --- a/src/tcpcheck.c +++ b/src/tcpcheck.c @@ -75,26 +75,13 @@ DECLARE_POOL(pool_head_tcpcheck_rule, "tcpcheck_rule", sizeof(struct tcpcheck_ru /**************************************************************************/ /*************** Init/deinit tcp-check rules and ruleset ******************/ /**************************************************************************/ -/* Releases memory allocated for a log-format string */ -static void free_tcpcheck_fmt(struct list *fmt) -{ - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } -} - /* Releases memory allocated for an HTTP header used in a tcp-check send rule */ void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr) { if (!hdr) return; - free_tcpcheck_fmt(&hdr->value); + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -131,28 +118,28 @@ void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool) break; case TCPCHK_SEND_STRING_LF: case TCPCHK_SEND_BINARY_LF: - free_tcpcheck_fmt(&rule->send.fmt); + lf_expr_deinit(&rule->send.fmt); break; case TCPCHK_SEND_HTTP: free(rule->send.http.meth.str.area); if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&rule->send.http.uri); else - free_tcpcheck_fmt(&rule->send.http.uri_fmt); + lf_expr_deinit(&rule->send.http.uri_fmt); istfree(&rule->send.http.vsn); free_tcpcheck_http_hdrs(&rule->send.http.hdrs); if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&rule->send.http.body); else - free_tcpcheck_fmt(&rule->send.http.body_fmt); + lf_expr_deinit(&rule->send.http.body_fmt); break; case TCPCHK_SEND_UNDEF: break; } break; case TCPCHK_ACT_EXPECT: - free_tcpcheck_fmt(&rule->expect.onerror_fmt); - free_tcpcheck_fmt(&rule->expect.onsuccess_fmt); + lf_expr_deinit(&rule->expect.onerror_fmt); + lf_expr_deinit(&rule->expect.onsuccess_fmt); release_sample_expr(rule->expect.status_expr); switch (rule->expect.type) { case TCPCHK_EXPECT_HTTP_STATUS: @@ -172,20 +159,20 @@ void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool) case TCPCHK_EXPECT_STRING_LF: case TCPCHK_EXPECT_BINARY_LF: case TCPCHK_EXPECT_HTTP_BODY_LF: - free_tcpcheck_fmt(&rule->expect.fmt); + lf_expr_deinit(&rule->expect.fmt); break; case TCPCHK_EXPECT_HTTP_HEADER: if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG) regex_free(rule->expect.hdr.name_re); else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) - free_tcpcheck_fmt(&rule->expect.hdr.name_fmt); + lf_expr_deinit(&rule->expect.hdr.name_fmt); else istfree(&rule->expect.hdr.name); if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG) regex_free(rule->expect.hdr.value_re); else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) - free_tcpcheck_fmt(&rule->expect.hdr.value_fmt); + lf_expr_deinit(&rule->expect.hdr.value_fmt); else if (!(rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE)) istfree(&rule->expect.hdr.value); break; @@ -434,7 +421,7 @@ static void tcpcheck_expect_onerror_message(struct buffer *msg, struct check *ch chunk_istcat(msg, info); goto comment; } - else if (!LIST_ISEMPTY(&rule->expect.onerror_fmt)) { + else if (!lf_expr_isempty(&rule->expect.onerror_fmt)) { msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onerror_fmt); goto comment; } @@ -529,7 +516,7 @@ static void tcpcheck_expect_onsuccess_message(struct buffer *msg, struct check * */ if (istlen(info)) chunk_istcat(msg, info); - if (!LIST_ISEMPTY(&rule->expect.onsuccess_fmt)) + if (!lf_expr_isempty(&rule->expect.onsuccess_fmt)) msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onsuccess_fmt); else if (check->type == PR_O2_TCPCHK_CHK && @@ -1697,7 +1684,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; case TCPCHK_EXPECT_HTTP_STATUS_REGEX: @@ -1705,7 +1692,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; @@ -1836,7 +1823,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp end_of_match: status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; } @@ -1863,7 +1850,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp goto wait_more_data; } status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = ist("HTTP content check could not find a response body"); TRACE_ERROR("no response boduy found while expected", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check); goto error; @@ -1912,7 +1899,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = (inverse ? ist("HTTP check matched unwanted content") : ist("HTTP content check did not match")); @@ -2649,7 +2636,7 @@ struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy } case TCPCHK_SEND_STRING_LF: case TCPCHK_SEND_BINARY_LF: - LIST_INIT(&chk->send.fmt); + lf_expr_init(&chk->send.fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(data, px, &chk->send.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", data, *errmsg); @@ -2790,7 +2777,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct } if (uri) { if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) { - LIST_INIT(&chk->send.http.uri_fmt); + lf_expr_init(&chk->send.http.uri_fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(uri, px, &chk->send.http.uri_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", uri, *errmsg); @@ -2818,7 +2805,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct memprintf(errmsg, "out of memory"); goto error; } - LIST_INIT(&hdr->value); + lf_expr_init(&hdr->value); hdr->name = istdup(hdrs[i].n); if (!isttest(hdr->name)) { memprintf(errmsg, "out of memory"); @@ -2834,7 +2821,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct if (body) { if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) { - LIST_INIT(&chk->send.http.body_fmt); + lf_expr_init(&chk->send.http.body_fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(body, px, &chk->send.http.body_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", body, *errmsg); @@ -3301,8 +3288,8 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro goto error; } chk->action = TCPCHK_ACT_EXPECT; - LIST_INIT(&chk->expect.onerror_fmt); - LIST_INIT(&chk->expect.onsuccess_fmt); + lf_expr_init(&chk->expect.onerror_fmt); + lf_expr_init(&chk->expect.onsuccess_fmt); chk->comment = comment; comment = NULL; chk->expect.type = type; chk->expect.min_recv = min_recv; @@ -3395,7 +3382,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro case TCPCHK_EXPECT_STRING_LF: case TCPCHK_EXPECT_BINARY_LF: case TCPCHK_EXPECT_HTTP_BODY_LF: - LIST_INIT(&chk->expect.fmt); + lf_expr_init(&chk->expect.fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(pattern, px, &chk->expect.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", pattern, *errmsg); @@ -3415,7 +3402,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro } else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) { px->conf.args.ctx = ARGC_SRV; - LIST_INIT(&chk->expect.hdr.name_fmt); + lf_expr_init(&chk->expect.hdr.name_fmt); if (!parse_logformat_string(npat, px, &chk->expect.hdr.name_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg); goto error; @@ -3445,7 +3432,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro } else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) { px->conf.args.ctx = ARGC_SRV; - LIST_INIT(&chk->expect.hdr.value_fmt); + lf_expr_init(&chk->expect.hdr.value_fmt); if (!parse_logformat_string(vpat, px, &chk->expect.hdr.value_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg); goto error; @@ -3497,7 +3484,6 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro */ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpcheck_rule *new) { - struct logformat_node *lf, *lfb; struct tcpcheck_http_hdr *hdr, *bhdr; @@ -3513,22 +3499,19 @@ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpchec if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&old->send.http.uri); else - free_tcpcheck_fmt(&old->send.http.uri_fmt); + lf_expr_deinit(&old->send.http.uri_fmt); old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT; old->send.http.uri = new->send.http.uri; new->send.http.uri = IST_NULL; } - else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !LIST_ISEMPTY(&new->send.http.uri_fmt)) { + else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !lf_expr_isempty(&new->send.http.uri_fmt)) { if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&old->send.http.uri); else - free_tcpcheck_fmt(&old->send.http.uri_fmt); + lf_expr_deinit(&old->send.http.uri_fmt); old->send.http.flags |= TCPCHK_SND_HTTP_FL_URI_FMT; - LIST_INIT(&old->send.http.uri_fmt); - list_for_each_entry_safe(lf, lfb, &new->send.http.uri_fmt, list) { - LIST_DELETE(&lf->list); - LIST_APPEND(&old->send.http.uri_fmt, &lf->list); - } + lf_expr_init(&old->send.http.uri_fmt); + lf_expr_xfer(&new->send.http.uri_fmt, &old->send.http.uri_fmt); } if (isttest(new->send.http.vsn)) { @@ -3549,22 +3532,19 @@ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpchec if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&old->send.http.body); else - free_tcpcheck_fmt(&old->send.http.body_fmt); + lf_expr_deinit(&old->send.http.body_fmt); old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT; old->send.http.body = new->send.http.body; new->send.http.body = IST_NULL; } - else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !LIST_ISEMPTY(&new->send.http.body_fmt)) { + else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !lf_expr_isempty(&new->send.http.body_fmt)) { if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&old->send.http.body); else - free_tcpcheck_fmt(&old->send.http.body_fmt); + lf_expr_deinit(&old->send.http.body_fmt); old->send.http.flags |= TCPCHK_SND_HTTP_FL_BODY_FMT; - LIST_INIT(&old->send.http.body_fmt); - list_for_each_entry_safe(lf, lfb, &new->send.http.body_fmt, list) { - LIST_DELETE(&lf->list); - LIST_APPEND(&old->send.http.body_fmt, &lf->list); - } + lf_expr_init(&old->send.http.body_fmt); + lf_expr_xfer(&new->send.http.body_fmt, &old->send.http.body_fmt); } } @@ -3815,8 +3795,8 @@ int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str) expect = &tcpcheck->expect; expect->type = TCPCHK_EXPECT_STRING; - LIST_INIT(&expect->onerror_fmt); - LIST_INIT(&expect->onsuccess_fmt); + lf_expr_init(&expect->onerror_fmt); + lf_expr_init(&expect->onsuccess_fmt); expect->ok_status = HCHK_STATUS_L7OKD; expect->err_status = HCHK_STATUS_L7RSP; expect->tout_status = HCHK_STATUS_L7TOUT; @@ -3877,9 +3857,9 @@ int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *str } /* Parses the "tcp-check" proxy keyword */ -static int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **errmsg) +int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **errmsg) { struct tcpcheck_ruleset *rs = NULL; struct tcpcheck_rule *chk = NULL; diff --git a/src/thread.c b/src/thread.c index ab4342d..655e199 100644 --- a/src/thread.c +++ b/src/thread.c @@ -1709,6 +1709,35 @@ static int cfg_parse_nbthread(char **args, int section_type, struct proxy *curpx return 0; } +/* Parse the "thread-hard-limit" global directive, which takes an integer + * argument that contains the desired maximum number of threads that will + * not be crossed. + */ +static int cfg_parse_thread_hard_limit(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + long nbthread; + char *errptr; + + if (too_many_args(1, args, err, NULL)) + return -1; + + nbthread = strtol(args[1], &errptr, 10); + if (!*args[1] || *errptr) { + memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]); + return -1; + } + + if (nbthread < 1 || nbthread > MAX_THREADS) { + memprintf(err, "'%s' value must be at least 1 (was %ld)", args[0], nbthread); + return -1; + } + + global.thread_limit = nbthread; + return 0; +} + /* Parse the "thread-group" global directive, which takes an integer argument * that designates a thread group, and a list of threads to put into that group. */ @@ -1855,6 +1884,7 @@ static int cfg_parse_thread_groups(char **args, int section_type, struct proxy * /* config keyword parsers */ static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "thread-hard-limit", cfg_parse_thread_hard_limit, 0 }, { CFG_GLOBAL, "nbthread", cfg_parse_nbthread, 0 }, { CFG_GLOBAL, "thread-group", cfg_parse_thread_group, 0 }, { CFG_GLOBAL, "thread-groups", cfg_parse_thread_groups, 0 }, diff --git a/src/tools.c b/src/tools.c index e1ba241..7608e7e 100644 --- a/src/tools.c +++ b/src/tools.c @@ -17,9 +17,14 @@ #endif #if defined(__FreeBSD__) +#include <sys/param.h> +#if __FreeBSD_version < 1300058 #include <elf.h> #include <dlfcn.h> extern void *__elf_aux_vector; +#else +#include <sys/auxv.h> +#endif #endif #if defined(__NetBSD__) @@ -36,6 +41,7 @@ extern void *__elf_aux_vector; #include <string.h> #include <time.h> #include <unistd.h> +#include <sys/mman.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/types.h> @@ -47,6 +53,10 @@ extern void *__elf_aux_vector; #include <sys/auxv.h> #endif +#if defined(USE_PRCTL) +#include <sys/prctl.h> +#endif + #include <import/eb32sctree.h> #include <import/eb32tree.h> #include <import/ebmbtree.h> @@ -1964,11 +1974,11 @@ int addr_is_local(const struct netns_entry *ns, * <map> with the hexadecimal representation of their ASCII-code (2 digits) * prefixed by <escape>, and will store the result between <start> (included) * and <stop> (excluded), and will always terminate the string with a '\0' - * before <stop>. The position of the '\0' is returned if the conversion - * completes. If bytes are missing between <start> and <stop>, then the - * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0' - * cannot even be stored so we return <start> without writing the 0. + * before <stop>. If bytes are missing between <start> and <stop>, then the + * conversion will be incomplete and truncated. * The input string must also be zero-terminated. + * + * Return the address of the \0 character, or NULL on error */ const char hextab[16] = "0123456789ABCDEF"; char *encode_string(char *start, char *stop, @@ -1990,8 +2000,9 @@ char *encode_string(char *start, char *stop, string++; } *start = '\0'; + return start; } - return start; + return NULL; } /* @@ -2020,8 +2031,9 @@ char *encode_chunk(char *start, char *stop, str++; } *start = '\0'; + return start; } - return start; + return NULL; } /* @@ -2030,8 +2042,9 @@ char *encode_chunk(char *start, char *stop, * is reached or NULL-byte is encountered. The result will * be stored between <start> (included) and <stop> (excluded). This * function will always try to terminate the resulting string with a '\0' - * before <stop>, and will return its position if the conversion - * completes. + * before <stop>. + * + * Return the address of the \0 character, or NULL on error */ char *escape_string(char *start, char *stop, const char escape, const long *map, @@ -2051,10 +2064,169 @@ char *escape_string(char *start, char *stop, string++; } *start = '\0'; + return start; + } + return NULL; +} + +/* CBOR helper to encode an uint64 value with prefix (3bits MAJOR type) + * according to RFC8949 + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_uint64_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, uint64_t value, + uint8_t prefix) +{ + int nb_bytes = 0; + + /* + * For encoding logic, see: + * https://www.rfc-editor.org/rfc/rfc8949.html#name-specification-of-the-cbor-e + */ + if (value < 24) { + /* argument is the value itself */ + prefix |= value; + } + else { + if (value <= 0xFFU) { + /* 1-byte */ + nb_bytes = 1; + prefix |= 24; // 0x18 + } + else if (value <= 0xFFFFU) { + /* 2 bytes */ + nb_bytes = 2; + prefix |= 25; // 0x19 + } + else if (value <= 0xFFFFFFFFU) { + /* 4 bytes */ + nb_bytes = 4; + prefix |= 26; // 0x1A + } + else { + /* 8 bytes */ + nb_bytes = 8; + prefix |= 27; // 0x1B + } + } + + start = ctx->e_fct_byte(ctx, start, stop, prefix); + if (start == NULL) + return NULL; + + /* encode 1 byte at a time from higher bits to lower bits */ + while (nb_bytes) { + uint8_t cur_byte = (value >> ((nb_bytes - 1) * 8)) & 0xFFU; + + start = ctx->e_fct_byte(ctx, start, stop, cur_byte); + if (start == NULL) + return NULL; + + nb_bytes--; + } + + return start; +} + +/* CBOR helper to encode an int64 value according to RFC8949 + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_int64(struct cbor_encode_ctx *ctx, + char *start, char *stop, int64_t value) +{ + uint64_t absolute_value = llabs(value); + int cbor_prefix; + + /* + * For encoding logic, see: + * https://www.rfc-editor.org/rfc/rfc8949.html#name-specification-of-the-cbor-e + */ + if (value >= 0) + cbor_prefix = 0x00; // unsigned int + else { + cbor_prefix = 0x20; // negative int + /* N-1 for negative int */ + absolute_value -= 1; + } + return cbor_encode_uint64_prefix(ctx, start, stop, + absolute_value, cbor_prefix); +} + +/* CBOR helper to encode a <prefix> string chunk according to RFC8949 + * + * if <bytes> is NULL, then only the <prefix> (with length) will be + * emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_bytes_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len, + uint8_t prefix) +{ + + size_t it = 0; + + /* write prefix (with text length as argument) */ + start = cbor_encode_uint64_prefix(ctx, start, stop, + len, prefix); + if (start == NULL) + return NULL; + + /* write actual bytes if provided */ + while (bytes && it < len) { + start = ctx->e_fct_byte(ctx, start, stop, bytes[it]); + if (start == NULL) + return NULL; + it++; } return start; } +/* CBOR helper to encode a text chunk according to RFC8949 + * + * if <text> is NULL, then only the text prefix (with length) will be emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_text(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *text, size_t len) +{ + return cbor_encode_bytes_prefix(ctx, start, stop, text, len, 0x60); +} + +/* CBOR helper to encode a byte string chunk according to RFC8949 + * + * if <bytes> is NULL, then only the byte string prefix (with length) will be + * emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_bytes(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len) +{ + return cbor_encode_bytes_prefix(ctx, start, stop, bytes, len, 0x40); +} + /* Check a string for using it in a CSV output format. If the string contains * one of the following four char <">, <,>, CR or LF, the string is * encapsulated between <"> and the <"> are escaped by a <""> sequence. @@ -4900,6 +5072,58 @@ void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, } } +/* Dumps the 64 bytes around <addr> at the end of <output> with symbols + * decoding. An optional special pointer may be recognized (special), in + * which case its type (spec_type) and name (spec_name) will be reported. + * This is convenient for pool names but could be used for list heads or + * anything in that vein. +*/ +void dump_area_with_syms(struct buffer *output, const void *base, const void *addr, + const void *special, const char *spec_type, const char *spec_name) +{ + const char *start, *end, *p; + const void *tag; + + chunk_appendf(output, "Contents around address %p+%lu=%p:\n", base, (ulong)(addr - base), addr); + + /* dump in word-sized blocks */ + start = (const void *)(((uintptr_t)addr - 32) & -sizeof(void*)); + end = (const void *)(((uintptr_t)addr + 32 + sizeof(void*) - 1) & -sizeof(void*)); + + while (start < end) { + dump_addr_and_bytes(output, " ", start, sizeof(void*)); + chunk_strcat(output, " ["); + for (p = start; p < start + sizeof(void*); p++) { + if (!may_access(p)) + chunk_strcat(output, "*"); + else if (isprint((unsigned char)*p)) + chunk_appendf(output, "%c", *p); + else + chunk_strcat(output, "."); + } + + if (may_access(start)) + tag = *(const void **)start; + else + tag = NULL; + + if (special && tag == special) { + /* the pool can often be there so let's detect it */ + chunk_appendf(output, "] [%s:%s", spec_type, spec_name); + } + else if (tag) { + /* print pointers that resolve to a symbol */ + size_t back_data = output->data; + chunk_strcat(output, "] ["); + if (!resolve_sym_name(output, NULL, tag)) + output->data = back_data; + } + + chunk_strcat(output, "]\n"); + start = p; + } +} + /* print a line of text buffer (limited to 70 bytes) to <out>. The format is : * <2 spaces> <offset=5 digits> <space or plus> <space> <70 chars max> <\n> * which is 60 chars per line. Non-printable chars \t, \n, \r and \e are @@ -5018,6 +5242,7 @@ const char *get_exec_path() if (execfn && execfn != ENOENT) ret = (const char *)execfn; #elif defined(__FreeBSD__) +#if __FreeBSD_version < 1300058 Elf_Auxinfo *auxv; for (auxv = __elf_aux_vector; auxv->a_type != AT_NULL; ++auxv) { if (auxv->a_type == AT_EXECPATH) { @@ -5025,6 +5250,14 @@ const char *get_exec_path() break; } } +#else + static char execpath[MAXPATHLEN]; + + if (execpath[0] == '\0') + elf_aux_info(AT_EXECPATH, execpath, MAXPATHLEN); + if (execpath[0] != '\0') + ret = execpath; +#endif #elif defined(__NetBSD__) AuxInfo *auxv; for (auxv = _dlauxinfo(); auxv->a_type != AT_NULL; ++auxv) { @@ -5511,10 +5744,10 @@ void ha_random_jump96(uint32_t dist) } } -/* Generates an RFC4122 UUID into chunk <output> which must be at least 37 - * bytes large. +/* Generates an RFC 9562 version 4 UUID into chunk + * <output> which must be at least 37 bytes large. */ -void ha_generate_uuid(struct buffer *output) +void ha_generate_uuid_v4(struct buffer *output) { uint32_t rnd[4]; uint64_t last; @@ -5535,6 +5768,31 @@ void ha_generate_uuid(struct buffer *output) (long long)((rnd[2] >> 14u) | ((uint64_t) rnd[3] << 18u)) & 0xFFFFFFFFFFFFull); } +/* Generates an RFC 9562 version 7 UUID into chunk + * <output> which must be at least 37 bytes large. + */ +void ha_generate_uuid_v7(struct buffer *output) +{ + uint32_t rnd[3]; + uint64_t last; + uint64_t time; + + time = (date.tv_sec * 1000) + (date.tv_usec / 1000); + last = ha_random64(); + rnd[0] = last; + rnd[1] = last >> 32; + + last = ha_random64(); + rnd[2] = last; + + chunk_printf(output, "%8.8x-%4.4x-%4.4x-%4.4x-%12.12llx", + (uint)(time >> 16u), + (uint)(time & 0xFFFF), + ((rnd[0] >> 16u) & 0xFFF) | 0x7000, // highest 4 bits indicate the uuid version + (rnd[1] & 0x3FFF) | 0x8000, // the highest 2 bits indicate the UUID variant (10), + (long long)((rnd[1] >> 14u) | ((uint64_t) rnd[2] << 18u)) & 0xFFFFFFFFFFFFull); +} + /* only used by parse_line() below. It supports writing in place provided that * <in> is updated to the next location before calling it. In that case, the @@ -6206,6 +6464,94 @@ int openssl_compare_current_name(const char *name) return 1; } +/* prctl/PR_SET_VMA wrapper to easily give a name to virtual memory areas, + * knowing their address and size. + * + * It is only intended for use with memory allocated using mmap (private or + * shared anonymous maps) or malloc (provided that <size> is at least one page + * large), which is memory that may be released using munmap(). For memory + * allocated using malloc(), no naming will be attempted if the vma is less + * than one page large, because naming is only relevant for large memory + * blocks. For instance, glibc/malloc() will directly use mmap() once + * MMAP_THRESHOLD is reached (defaults to 128K), and will try to use the + * heap as much as possible below that. + * + * <type> and <name> are mandatory + * <id> is optional, if != ~0, will be used to append an id after the name + * in order to differentiate 2 entries set using the same <type> and <name> + * + * The function does nothing if naming API is not available, and naming errors + * are ignored. + */ +void vma_set_name_id(void *addr, size_t size, const char *type, const char *name, unsigned int id) +{ + long pagesize = sysconf(_SC_PAGESIZE); + void *aligned_addr; + __maybe_unused size_t aligned_size; + + BUG_ON(!type || !name); + + /* prctl/PR_SET/VMA expects the start of an aligned memory address, but + * user may have provided address returned by malloc() which may not be + * aligned nor point to the beginning of the map + */ + aligned_addr = (void *)((uintptr_t)addr & -4096); + aligned_size = (((addr + size) - aligned_addr) + 4095) & -4096; + + if (aligned_addr != addr) { + /* provided pointer likely comes from malloc(), at least it + * doesn't come from mmap() which only returns aligned addresses + */ + if (size < pagesize) + return; + } +#if defined(USE_PRCTL) && defined(PR_SET_VMA) + { + /* + * From Linux 5.17 (and if the `CONFIG_ANON_VMA_NAME` kernel config is set)`, + * anonymous regions can be named. + * We intentionally ignore errors as it should not jeopardize the memory context + * mapping whatsoever (e.g. older kernels). + * + * The naming can take up to 79 characters, accepting valid ASCII values + * except [, ], \, $ and '. + * As a result, when looking for /proc/<pid>/maps, we can see the anonymous range + * as follow : + * `7364c4fff000-736508000000 rw-s 00000000 00:01 3540 [anon_shmem:scope:name{-id}]` + * (MAP_SHARED) + * `7364c4fff000-736508000000 rw-s 00000000 00:01 3540 [anon:scope:name{-id}]` + * (MAP_PRIVATE) + */ + char fullname[80]; + int rn; + + if (id != ~0) + rn = snprintf(fullname, sizeof(fullname), "%s:%s-%u", type, name, id); + else + rn = snprintf(fullname, sizeof(fullname), "%s:%s", type, name); + + if (rn >= 0) { + /* Give a name to the map by setting PR_SET_VMA_ANON_NAME attribute + * using prctl/PR_SET_VMA combination. + * + * note from 'man prctl': + * assigning an attribute to a virtual memory area might prevent it + * from being merged with adjacent virtual memory areas due to the + * difference in that attribute's value. + */ + (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, + aligned_addr, aligned_size, fullname); + } + } +#endif +} + +/* wrapper for vma_set_name_id() but without id */ +void vma_set_name(void *addr, size_t size, const char *type, const char *name) +{ + vma_set_name_id(addr, size, type, name, ~0); +} + #if defined(RTLD_DEFAULT) || defined(RTLD_NEXT) /* redefine dlopen() so that we can detect unexpected replacement of some * critical symbols, typically init/alloc/free functions coming from alternate diff --git a/src/trace.c b/src/trace.c index a233c0d..fcf557b 100644 --- a/src/trace.c +++ b/src/trace.c @@ -129,7 +129,7 @@ int __trace_enabled(enum trace_level level, uint64_t mask, struct trace_source * if (!sess && strm) sess = strm->sess; - else if (!sess && conn && LIST_INLIST(&conn->session_list)) + else if (!sess && conn && LIST_INLIST(&conn->sess_el)) sess = conn->owner; else if (!sess && check) sess = check->sess; @@ -376,15 +376,16 @@ static int trace_source_parse_verbosity(struct trace_source *src, const struct name_desc *nd; int ret; + /* Only "quiet" is defined for all sources. Other identifiers are + * specific to trace source. + */ if (strcmp(verbosity, "quiet") == 0) { ret = 0; goto end; } - /* Only "quiet" is defined for all sources. Other identifiers are - * specific to trace source. - */ - BUG_ON(!src); + if (!src) + return -1; if (!src->decoding || !src->decoding[0].name) { if (strcmp(verbosity, "default") != 0) @@ -566,10 +567,16 @@ static int trace_parse_statement(char **args, char **msg) } else if (strcmp(args[2], "level") == 0) { const char *name = args[3]; - int level; + int level = -1; - if (!*name) { - chunk_printf(&trash, "Supported trace levels for source %s:\n", src->name.ptr); + if (*name) + level = trace_parse_level(name); + + if (level < 0) { + chunk_reset(&trash); + if (*name) + chunk_appendf(&trash, "No such trace level '%s'. ", name); + chunk_appendf(&trash, "Supported trace levels for source %s:\n", src->name.ptr); chunk_appendf(&trash, " %c error : report errors\n", src->level == TRACE_LEVEL_ERROR ? '*' : ' '); chunk_appendf(&trash, " %c user : also information useful to the end user\n", @@ -584,13 +591,7 @@ static int trace_parse_statement(char **args, char **msg) src->level == TRACE_LEVEL_DEVELOPER ? '*' : ' '); trash.area[trash.data] = 0; *msg = strdup(trash.area); - return LOG_WARNING; - } - - level = trace_parse_level(name); - if (level < 0) { - memprintf(msg, "No such trace level '%s'", name); - return LOG_ERR; + return *name ? LOG_ERR : LOG_WARNING; } HA_ATOMIC_STORE(&src->level, level); @@ -734,10 +735,16 @@ static int trace_parse_statement(char **args, char **msg) else if (strcmp(args[2], "verbosity") == 0) { const char *name = args[3]; const struct name_desc *nd; - int verbosity; + int verbosity = -1; - if (!*name) { - chunk_printf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr); + if (*name) + verbosity = trace_source_parse_verbosity(src, name); + + if (verbosity < 0) { + chunk_reset(&trash); + if (*name) + chunk_appendf(&trash, "No such verbosity level '%s'. ", name); + chunk_appendf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr); chunk_appendf(&trash, " %c quiet : only report basic information with no decoding\n", src->verbosity == 0 ? '*' : ' '); if (!src->decoding || !src->decoding[0].name) { @@ -751,13 +758,7 @@ static int trace_parse_statement(char **args, char **msg) } trash.area[trash.data] = 0; *msg = strdup(trash.area); - return LOG_WARNING; - } - - verbosity = trace_source_parse_verbosity(src, name); - if (verbosity < 0) { - memprintf(msg, "No such verbosity level '%s'", name); - return LOG_ERR; + return *name ? LOG_ERR : LOG_WARNING; } HA_ATOMIC_STORE(&src->verbosity, verbosity); @@ -837,7 +838,7 @@ int trace_parse_cmd(char *arg, char **errmsg) if (strlen(field)) { level = trace_parse_level(field); if (level < 0) { - memprintf(errmsg, "no such level '%s'", field); + memprintf(errmsg, "no such trace level '%s', available levels are 'error', 'user', 'proto', 'state', 'data', and 'developer'", field); return 1; } } @@ -848,18 +849,23 @@ int trace_parse_cmd(char *arg, char **errmsg) /* 3. verbosity */ field = str; if (strchr(field, ':')) { - memprintf(errmsg, "too many double-colon separator"); - return 1; - } - - if (!src && strcmp(field, "quiet") != 0) { - memprintf(errmsg, "trace source must be specified for verbosity other than 'quiet'"); + memprintf(errmsg, "too many double-colon separators in trace definition"); return 1; } verbosity = trace_source_parse_verbosity(src, field); if (verbosity < 0) { - memprintf(errmsg, "no such verbosity '%s' for source '%s'", field, name); + const struct name_desc *nd; + + if (!src) { + memprintf(errmsg, "trace source must be specified for verbosity other than 'quiet'"); + } + else { + memprintf(errmsg, "no such trace verbosity '%s' for source '%s', available verbosities for this source are: 'quiet'", field, name); + for (nd = src->decoding; nd->name && nd->desc; nd++) + memprintf(errmsg, "%s, %s'%s'", *errmsg, (nd + 1)->name ? "" : "and ", nd->name); + } + return 1; } diff --git a/src/uri_auth.c b/src/uri_auth.c index db7e6c6..979b327 100644 --- a/src/uri_auth.c +++ b/src/uri_auth.c @@ -110,7 +110,7 @@ struct uri_auth *stats_set_realm(struct uri_auth **root, char *realm) } /* - * Returns a default uri_auth with STAT_SHNODE flag enabled and + * Returns a default uri_auth with STAT_F_SHNODE flag enabled and * <node> set as the name if it is not empty. * Uses the pointer provided if not NULL and not initialized. */ @@ -128,7 +128,7 @@ struct uri_auth *stats_set_node(struct uri_auth **root, char *name) if ((u = stats_check_init_uri_auth(root)) == NULL) goto out_u; - if (!stats_set_flag(root, STAT_SHNODE)) + if (!stats_set_flag(root, STAT_F_SHNODE)) goto out_u; if (node_copy) { @@ -145,7 +145,7 @@ struct uri_auth *stats_set_node(struct uri_auth **root, char *name) } /* - * Returns a default uri_auth with STAT_SHDESC flag enabled and + * Returns a default uri_auth with STAT_F_SHDESC flag enabled and * <description> set as the desc if it is not empty. * Uses the pointer provided if not NULL and not initialized. */ @@ -163,7 +163,7 @@ struct uri_auth *stats_set_desc(struct uri_auth **root, char *desc) if ((u = stats_check_init_uri_auth(root)) == NULL) goto out_u; - if (!stats_set_flag(root, STAT_SHDESC)) + if (!stats_set_flag(root, STAT_F_SHDESC)) goto out_u; if (desc_copy) { @@ -328,16 +328,16 @@ static int smp_fetch_var(const struct arg *args, struct sample *smp, const char */ static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struct var *var, int var_type) { - if (var_type == SMP_T_STR || var_type == SMP_T_BIN) { - ha_free(&var->data.u.str.area); - var_accounting_diff(vars, smp->sess, smp->strm, - -var->data.u.str.data); - } - else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) { - ha_free(&var->data.u.meth.str.area); - var_accounting_diff(vars, smp->sess, smp->strm, - -var->data.u.meth.str.data); - } + if (var_type == SMP_T_STR || var_type == SMP_T_BIN) { + ha_free(&var->data.u.str.area); + var_accounting_diff(vars, smp->sess, smp->strm, + -var->data.u.str.data); + } + else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) { + ha_free(&var->data.u.meth.str.area); + var_accounting_diff(vars, smp->sess, smp->strm, + -var->data.u.meth.str.data); + } } /* This function tries to create a variable whose name hash is <name_hash> in @@ -363,7 +363,7 @@ static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struc * * It returns 0 on failure, non-zero on success. */ -static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags) +int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags) { struct vars *vars; struct var *var; @@ -515,7 +515,7 @@ static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp * session and stream found in <smp>. Note that stream may be null for * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1. */ -static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp) +int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp) { struct vars *vars; struct var *var; @@ -787,7 +787,7 @@ static enum act_return action_store(struct act_rule *rule, struct proxy *px, /* Process the expression. */ memset(&smp, 0, sizeof(smp)); - if (!LIST_ISEMPTY(&rule->arg.vars.fmt)) { + if (!lf_expr_isempty(&rule->arg.vars.fmt)) { /* a format-string is used */ fmtstr = alloc_trash_chunk(); @@ -838,14 +838,7 @@ static enum act_return action_clear(struct act_rule *rule, struct proxy *px, static void release_store_rule(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, &rule->arg.vars.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rule->arg.vars.fmt); release_sample_expr(rule->arg.vars.expr); } @@ -949,7 +942,7 @@ static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy condition = istsplit(&var, ','); } - LIST_INIT(&rule->arg.vars.fmt); + lf_expr_init(&rule->arg.vars.fmt); if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err)) return ACT_RET_PRS_ERR; @@ -1029,11 +1022,6 @@ static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy return ACT_RET_PRS_ERR; (*arg)++; - - /* for late error reporting */ - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; } else { /* set-var */ rule->arg.vars.expr = sample_parse_expr((char **)args, arg, px->conf.args.file, @@ -1072,6 +1060,7 @@ static int vars_parse_global_set_var(char **args, int section_type, struct proxy struct proxy px = { .id = "CFG", .conf.args = { .file = file, .line = line, }, + .flags = PR_FL_CHECKED, }; struct act_rule rule = { .arg.vars.scope = SCOPE_PROC, @@ -1192,6 +1181,7 @@ static int vars_parse_cli_set_var(char **args, char *payload, struct appctx *app struct proxy px = { .id = "CLI", .conf.args = { .file = "CLI", .line = 0, }, + .flags = PR_FL_CHECKED, }; struct act_rule rule = { .arg.vars.scope = SCOPE_PROC, diff --git a/src/xprt_quic.c b/src/xprt_quic.c index eda113c..b83b634 100644 --- a/src/xprt_quic.c +++ b/src/xprt_quic.c @@ -140,6 +140,13 @@ static int qc_xprt_start(struct connection *conn, void *ctx) /* mux-quic can now be considered ready. */ qc->mux_state = QC_MUX_READY; + /* Schedule quic-conn to ensure post handshake frames are emitted. This + * is not done for 0-RTT as xprt->start happens before handshake + * completion. + */ + if (qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) + tasklet_wakeup(qc->wait_event.tasklet); + ret = 1; out: TRACE_LEAVE(QUIC_EV_CONN_NEW, qc); |