diff options
Diffstat (limited to '')
-rw-r--r-- | CHANGELOG | 23 | ||||
-rw-r--r-- | SUBVERS | 2 | ||||
-rw-r--r-- | VERDATE | 4 | ||||
-rw-r--r-- | VERSION | 2 | ||||
-rw-r--r-- | doc/configuration.txt | 69 | ||||
-rw-r--r-- | doc/management.txt | 21 | ||||
-rw-r--r-- | src/log.c | 21 | ||||
-rw-r--r-- | src/mux_h1.c | 30 | ||||
-rw-r--r-- | src/proxy.c | 165 | ||||
-rw-r--r-- | src/quic_tx.c | 77 | ||||
-rw-r--r-- | src/resolvers.c | 6 |
11 files changed, 237 insertions, 183 deletions
@@ -1,6 +1,29 @@ ChangeLog : =========== +2024/06/14 : 3.0.2 + - MINOR: log: fix "http-send-name-header" ignore warning message + - BUG/MINOR: proxy: fix server_id_hdr_name leak on deinit() + - BUG/MINOR: proxy: fix log_tag leak on deinit() + - BUG/MINOR: proxy: fix email-alert leak on deinit() + - BUG/MINOR: proxy: fix check_{command,path} leak on deinit() + - BUG/MINOR: proxy: fix dyncookie_key leak on deinit() + - BUG/MINOR: proxy: fix source interface and usesrc leaks on deinit() + - BUG/MINOR: proxy: fix header_unique_id leak on deinit() + - BUG/MEDIUM: log: fix lf_expr_postcheck() behavior with default section + - DOC: config: move "hash-key" from proxy to server options + - DOC: config: add missing section hint for "guid" proxy keyword + - DOC: config: add missing context hint for new server and proxy keywords + - BUG/MINOR: promex: Skip resolvers metrics when there is no resolver section + - MINOR: proxy: add proxy_free_common() helper function + - BUG/MEDIUM: proxy: fix UAF with {tcp,http}checks logformat expressions + - CLEANUP: log/proxy: fix comment in proxy_free_common() + - BUG/MAJOR: mux-h1: Prevent any UAF on H1 connection after draining a request + - BUG/MINOR: quic: fix padding of INITIAL packets + - DOC/MINOR: management: add missed -dR and -dv options + - DOC/MINOR: management: add -dZ option + - DOC: management: rename show stats domain cli "dns" to "resolvers" + 2024/06/10 : 3.0.1 - BUG/MINOR: cfgparse: remove the correct option on httpcheck send-state warning - BUG/MINOR: tcpcheck: report correct error in tcp-check rule parser @@ -1,2 +1,2 @@ --471a1b2 +-a45a8e6 @@ -1,2 +1,2 @@ -2024-06-10 16:15:30 +0200 -2024/06/10 +2024-06-14 15:00:35 +0200 +2024/06/14 @@ -1 +1 @@ -3.0.1 +3.0.2 diff --git a/doc/configuration.txt b/doc/configuration.txt index 370717f..6b1c55d 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3,7 +3,7 @@ Configuration Manual ---------------------- version 3.0 - 2024/06/10 + 2024/06/14 This document covers the configuration language as implemented in the version @@ -5190,7 +5190,6 @@ filter - X X X fullconn X - X X guid - X X X hash-balance-factor X - X X -hash-key X - X X hash-type X - X X http-after-response X (!) X X X http-check comment X - X X @@ -6954,11 +6953,18 @@ fullconn <conns> guid <string> - Specify a case-sensitive global unique ID for this proxy. This must be unique - across all haproxy configuration on every object types. Format is left - unspecified to allow the user to select its naming policy. The only - restriction is its length which cannot be greater than 127 characters. All - alphanumerical values and '.', ':', '-' and '_' characters are valid. + Specify a case-sensitive global unique ID for this proxy. + + May be used in the following contexts: tcp, http, log + + May be used in sections : defaults | frontend | listen | backend + no | yes | yes | yes + + <string> must be unique across all haproxy configuration on every object + types. Format is left unspecified to allow the user to select its naming + policy. The only restriction is its length which cannot be greater than + 127 characters. All alphanumerical values and '.', ':', '-' and '_' + characters are valid. hash-balance-factor <factor> @@ -6994,29 +7000,6 @@ hash-balance-factor <factor> See also : "balance" and "hash-type". -hash-key <key> - Specify how "hash-type consistent" node keys are computed - - Arguments : - <key> <key> may be one of the following : - - id The node keys will be derived from the server's numeric - identifier as set from "id" or which defaults to its position - in the server list. - - addr The node keys will be derived from the server's address, when - available, or else fall back on "id". - - addr-port The node keys will be derived from the server's address and - port, when available, or else fall back on "id". - - The "addr" and "addr-port" options may be useful in scenarios where multiple - HAProxy processes are balancing traffic to the same set of servers. If the - server order of each process is different (because, for example, DNS records - were resolved in different orders) then this will allow each independent - HAProxy processes to agree on routing decisions. - - hash-type <method> <function> <modifier> Specify a method to use for mapping hashes to servers @@ -17174,10 +17157,36 @@ force-tlsv13 "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". guid <string> + May be used in the following contexts: tcp, http, log + Specify a case-sensitive global unique ID for this server. This must be unique across all haproxy configuration on every object types. See "guid" proxy keyword description for more information on its format. +hash-key <key> + May be used in the following contexts: tcp, http, log + + Specify how "hash-type consistent" node keys are computed + + Arguments : + <key> <key> may be one of the following : + + id The node keys will be derived from the server's numeric + identifier as set from "id" or which defaults to its position + in the server list. + + addr The node keys will be derived from the server's address, when + available, or else fall back on "id". + + addr-port The node keys will be derived from the server's address and + port, when available, or else fall back on "id". + + The "addr" and "addr-port" options may be useful in scenarios where multiple + HAProxy processes are balancing traffic to the same set of servers. If the + server order of each process is different (because, for example, DNS records + were resolved in different orders) then this will allow each independent + HAProxy processes to agree on routing decisions. + id <value> May be used in the following contexts: tcp, http, log diff --git a/doc/management.txt b/doc/management.txt index d036018..58ac958 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -360,6 +360,11 @@ list of options is : the byte's value to -dM but using this option allows to disable/enable use of a previously set value. + -dR : disable SO_REUSEPORT socket option on listening ports. It is equivalent + to the "global" section's "noreuseport" keyword. This may be applied in + multi-threading scenarios, when load distribution issues observed among the + haproxy threads (could be monitored with top). + -dS : disable use of the splice() system call. It is equivalent to the "global" section's "nosplice" keyword. This may be used when splice() is suspected to behave improperly or to cause performance issues, or when @@ -381,6 +386,12 @@ list of options is : in checked configurations to fail. This is equivalent to global option "zero-warning". + -dZ : disable forwarding of data in "zero-copy" mode. It is equivalent to the + "global" section's "tune.disable-zero-copy-forwarding" keyword. This may be + helpful in case of issues with data loss or data integrity, or when using + strace to see the forwarded data, as it also disables any kernel tcp + splicing. + -db : disable background mode and multi-process mode. The process remains in foreground. It is mainly used during development or during small tests, as Ctrl-C is enough to stop the process. Never use it in an init script. @@ -419,6 +430,12 @@ list of options is : level name, the list of available keywords is presented. For example it can be convenient to pass 'help' for each field to consult the list first. + -dv : disable the use of the "evports" poller. It is equivalent to the + "global" section's keyword "noevports". It is mostly useful when suspecting + a bug related to this poller. On systems supporting event ports (SunOS + derived from Solaris 10 and later), the fallback will generally be the + "poll" poller. + -m <limit> : limit allocatable memory, which is used to keep process's data, to <limit> megabytes. This may cause some connection refusals or some slowdowns depending on the amount of memory needed for normal operations. @@ -3247,8 +3264,8 @@ show sess <id> | older <age> | susp | all is meant to be interpreted while checking function strm_dump_to_buffer() in src/stream.c to figure the exact meaning of certain fields. -show stat [domain <dns|proxy>] [{<iid>|<proxy>} <type> <sid>] [typed|json] \ - [desc] [up|no-maint] +show stat [domain <resolvers|proxy>] [{<iid>|<proxy>} <type> <sid>] \ + [typed|json] [desc] [up|no-maint] Dump statistics. The domain is used to select which statistics to print; dns and proxy are available for now. By default, the CSV format is used; you can activate the extended typed output format described in the section above if @@ -870,15 +870,17 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, if (!ret) goto fail; - if (!(curproxy->flags & PR_FL_CHECKED)) { + if (!(curproxy->cap & PR_CAP_DEF) && + !(curproxy->flags & PR_FL_CHECKED)) { /* add the lf_expr to the proxy checks to delay postparsing * since config-related proxy properties are not stable yet */ LIST_APPEND(&curproxy->conf.lf_checks, &lf_expr->list); } else { - /* probably called during runtime or with proxy already checked, - * perform the postcheck right away + /* default proxy, or regular proxy and probably called during + * runtime or with proxy already checked, perform the postcheck + * right away */ if (!lf_expr_postcheck(lf_expr, curproxy, err)) goto fail; @@ -948,11 +950,17 @@ static int lf_expr_postcheck_node_opt(struct lf_expr *lf_expr, struct logformat_ * compatible with logformat expression, but once the proxy is checked, we fail * as soon as we face incompatibilities) * + * If the proxy is a default section, then allow the postcheck to succeed: + * the logformat expression may or may not work properly depending on the + * actual proxy that effectively runs it during runtime, but we have to stay + * permissive since we cannot assume it won't work. + * * It returns 1 on success and 0 on error, <err> will be set in case of error. */ int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err) { struct logformat_node *lf; + int default_px = (px->cap & PR_CAP_DEF); if (!(px->flags & PR_FL_CHECKED)) px->to_log |= LW_INIT; @@ -987,7 +995,8 @@ int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err) px->to_log |= LW_REQ; } else if (lf->type == LOG_FMT_ALIAS) { - if (lf->alias->mode == PR_MODE_HTTP && px->mode != PR_MODE_HTTP) { + if (lf->alias->mode == PR_MODE_HTTP && + !default_px && px->mode != PR_MODE_HTTP) { memprintf(err, "format alias '%s' is reserved for HTTP mode", lf->alias->name); goto fail; @@ -1006,7 +1015,7 @@ int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err) if (!lf_expr_postcheck_node_opt(lf_expr, lf, err)) goto fail; } - if ((px->to_log & (LW_REQ | LW_RESP)) && + if (!default_px && (px->to_log & (LW_REQ | LW_RESP)) && (px->mode != PR_MODE_HTTP && !(px->options & PR_O_HTTP_UPG))) { memprintf(err, "logformat expression not usable here (at least one node depends on HTTP mode)"); goto fail; @@ -1199,7 +1208,7 @@ static int _postcheck_log_backend_compat(struct proxy *be) free_stick_rules(&be->sticking_rules); } if (isttest(be->server_id_hdr_name)) { - ha_warning("Cannot set \"server_id_hdr_name\" with 'mode log' in %s '%s'. It will be ignored.\n", + ha_warning("Cannot set \"http-send-name-header\" with 'mode log' in %s '%s'. It will be ignored.\n", proxy_type_str(be), be->id); err_code |= ERR_WARN; diff --git a/src/mux_h1.c b/src/mux_h1.c index 0c17315..61c8516 100644 --- a/src/mux_h1.c +++ b/src/mux_h1.c @@ -1018,9 +1018,10 @@ static int h1s_must_shut_conn(struct h1s *h1s) /* Really detach the H1S. Most of time of it called from h1_detach() when the * stream is detached from the connection. But if the request message must be - * drained first, the detach is deferred. + * drained first, the detach is deferred. Returns 0 if the h1s is detached but + * h1c is still usable. -1 is returned if h1s was released. */ -static void h1s_finish_detach(struct h1s *h1s) +static int h1s_finish_detach(struct h1s *h1s) { struct h1c *h1c; struct session *sess; @@ -1063,7 +1064,7 @@ static void h1s_finish_detach(struct h1s *h1s) if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) { h1c->conn->owner = NULL; h1c->conn->mux->destroy(h1c); - goto end; + goto released; } /* Always idle at this step */ @@ -1074,7 +1075,7 @@ static void h1s_finish_detach(struct h1s *h1s) if (session_check_idle_conn(sess, h1c->conn)) { /* The connection got destroyed, let's leave */ TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; + goto released; } } else { @@ -1092,13 +1093,13 @@ static void h1s_finish_detach(struct h1s *h1s) /* The server doesn't want it, let's kill the connection right away */ h1c->conn->mux->destroy(h1c); TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; + goto released; } /* At this point, the connection has been added to the * server idle list, so another thread may already have * hijacked it, so we can't do anything with it. */ - return; + goto end; } } @@ -1110,6 +1111,7 @@ static void h1s_finish_detach(struct h1s *h1s) !h1c->conn->owner) { TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn); h1_release(h1c); + goto released; } else { if (h1c->state == H1_CS_IDLE) { @@ -1117,8 +1119,10 @@ static void h1s_finish_detach(struct h1s *h1s) * subscribe for reads waiting for new data */ if (unlikely(b_data(&h1c->ibuf))) { - if (h1_process(h1c) == -1) - goto end; + if (h1_process(h1c) == -1) { + /* h1c was released, don't reuse it anymore */ + goto released; + } } else h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); @@ -1128,6 +1132,11 @@ static void h1s_finish_detach(struct h1s *h1s) } end: TRACE_LEAVE(H1_EV_STRM_END); + return 0; + + released: + TRACE_DEVEL("leaving after releasing the connection", H1_EV_STRM_END); + return -1; } @@ -4017,8 +4026,8 @@ static int h1_process(struct h1c * h1c) h1_shutw_conn(conn); goto release; } - h1s_finish_detach(h1c->h1s); - goto end; + if (h1s_finish_detach(h1c->h1s) == -1) + goto released; } } @@ -4088,6 +4097,7 @@ static int h1_process(struct h1c * h1c) h1_release(h1c); TRACE_DEVEL("leaving after releasing the connection", H1_EV_H1C_WAKE); } + released: return -1; } diff --git a/src/proxy.c b/src/proxy.c index f1d9d7a..3acfdf6 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -191,6 +191,72 @@ void free_server_rules(struct list *srules) } } +/* Frees proxy members that are common to all proxy types (either regular or + * default ones) for a proxy that's about to be destroyed. + * This is a subset of the complete proxy or default proxy deinit code. + */ +static inline void proxy_free_common(struct proxy *px) +{ + struct acl *acl, *aclb; + struct logger *log, *logb; + struct lf_expr *lf, *lfb; + + ha_free(&px->id); + ha_free(&px->conf.file); + ha_free(&px->check_command); + ha_free(&px->check_path); + ha_free(&px->cookie_name); + ha_free(&px->rdp_cookie_name); + ha_free(&px->dyncookie_key); + ha_free(&px->cookie_domain); + ha_free(&px->cookie_attrs); + ha_free(&px->lbprm.arg_str); + ha_free(&px->capture_name); + istfree(&px->monitor_uri); + ha_free(&px->conn_src.iface_name); +#if defined(CONFIG_HAP_TRANSPARENT) + ha_free(&px->conn_src.bind_hdr_name); +#endif + istfree(&px->server_id_hdr_name); + istfree(&px->header_unique_id); + + http_ext_clean(px); + + list_for_each_entry_safe(acl, aclb, &px->acl, list) { + LIST_DELETE(&acl->list); + prune_acl(acl); + free(acl); + } + + free_act_rules(&px->tcp_req.inspect_rules); + free_act_rules(&px->tcp_rep.inspect_rules); + free_act_rules(&px->tcp_req.l4_rules); + free_act_rules(&px->tcp_req.l5_rules); + free_act_rules(&px->http_req_rules); + free_act_rules(&px->http_res_rules); + free_act_rules(&px->http_after_res_rules); + + lf_expr_deinit(&px->logformat); + lf_expr_deinit(&px->logformat_sd); + lf_expr_deinit(&px->logformat_error); + lf_expr_deinit(&px->format_unique_id); + + list_for_each_entry_safe(log, logb, &px->loggers, list) { + LIST_DEL_INIT(&log->list); + free_logger(log); + } + + /* ensure that remaining lf_expr that were not postchecked (ie: disabled + * proxy) don't keep a reference on the proxy which is about to be freed. + */ + list_for_each_entry_safe(lf, lfb, &px->conf.lf_checks, list) + LIST_DEL_INIT(&lf->list); + + chunk_destroy(&px->log_tag); + + free_email_alert(px); +} + void free_proxy(struct proxy *p) { struct server *s; @@ -198,33 +264,21 @@ void free_proxy(struct proxy *p) struct listener *l,*l_next; struct bind_conf *bind_conf, *bind_back; struct acl_cond *cond, *condb; - struct acl *acl, *aclb; struct switching_rule *rule, *ruleb; struct redirect_rule *rdr, *rdrb; - struct logger *log, *logb; struct proxy_deinit_fct *pxdf; struct server_deinit_fct *srvdf; if (!p) return; - free(p->conf.file); - free(p->id); - free(p->cookie_name); - free(p->cookie_domain); - free(p->cookie_attrs); - free(p->lbprm.arg_str); + proxy_free_common(p); + + /* regular proxy specific cleanup */ release_sample_expr(p->lbprm.expr); free(p->server_state_file_name); - free(p->capture_name); - istfree(&p->monitor_uri); - free(p->rdp_cookie_name); free(p->invalid_rep); free(p->invalid_req); -#if defined(CONFIG_HAP_TRANSPARENT) - free(p->conn_src.bind_hdr_name); -#endif - istfree(&p->header_unique_id); if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP) free(p->lbprm.map.srv); @@ -238,12 +292,6 @@ void free_proxy(struct proxy *p) EXTRA_COUNTERS_FREE(p->extra_counters_fe); EXTRA_COUNTERS_FREE(p->extra_counters_be); - list_for_each_entry_safe(acl, aclb, &p->acl, list) { - LIST_DELETE(&acl->list); - prune_acl(acl); - free(acl); - } - free_server_rules(&p->server_rules); list_for_each_entry_safe(rule, ruleb, &p->switching_rules, list) { @@ -260,24 +308,6 @@ void free_proxy(struct proxy *p) http_free_redirect_rule(rdr); } - list_for_each_entry_safe(log, logb, &p->loggers, list) { - LIST_DEL_INIT(&log->list); - free_logger(log); - } - - lf_expr_deinit(&p->logformat); - lf_expr_deinit(&p->logformat_sd); - lf_expr_deinit(&p->format_unique_id); - lf_expr_deinit(&p->logformat_error); - - free_act_rules(&p->tcp_req.inspect_rules); - free_act_rules(&p->tcp_rep.inspect_rules); - free_act_rules(&p->tcp_req.l4_rules); - free_act_rules(&p->tcp_req.l5_rules); - free_act_rules(&p->http_req_rules); - free_act_rules(&p->http_res_rules); - free_act_rules(&p->http_after_res_rules); - free_stick_rules(&p->storersp_rules); free_stick_rules(&p->sticking_rules); @@ -348,8 +378,6 @@ void free_proxy(struct proxy *p) free(p->desc); - http_ext_clean(p); - task_destroy(p->task); pool_destroy(p->req_cap_pool); @@ -1421,49 +1449,19 @@ void proxy_preset_defaults(struct proxy *defproxy) } /* Frees all dynamic settings allocated on a default proxy that's about to be - * destroyed. This is a subset of the complete proxy deinit code, but these - * should probably be merged ultimately. Note that most of the fields are not - * even reset, so extreme care is required here, and calling - * proxy_preset_defaults() afterwards would be safer. + * destroyed. Note that most of the fields are not even reset, so extreme care + * is required here, and calling proxy_preset_defaults() afterwards would be + * safer. */ void proxy_free_defaults(struct proxy *defproxy) { - struct acl *acl, *aclb; - struct logger *log, *logb; struct cap_hdr *h,*h_next; - ha_free(&defproxy->id); - ha_free(&defproxy->conf.file); + proxy_free_common(defproxy); + + /* default proxy specific cleanup */ ha_free((char **)&defproxy->defsrv.conf.file); - ha_free(&defproxy->check_command); - ha_free(&defproxy->check_path); - ha_free(&defproxy->cookie_name); - ha_free(&defproxy->rdp_cookie_name); - ha_free(&defproxy->dyncookie_key); - ha_free(&defproxy->cookie_domain); - ha_free(&defproxy->cookie_attrs); - ha_free(&defproxy->lbprm.arg_str); - ha_free(&defproxy->capture_name); - istfree(&defproxy->monitor_uri); ha_free(&defproxy->defbe.name); - ha_free(&defproxy->conn_src.iface_name); - istfree(&defproxy->server_id_hdr_name); - - http_ext_clean(defproxy); - - list_for_each_entry_safe(acl, aclb, &defproxy->acl, list) { - LIST_DELETE(&acl->list); - prune_acl(acl); - free(acl); - } - - free_act_rules(&defproxy->tcp_req.inspect_rules); - free_act_rules(&defproxy->tcp_rep.inspect_rules); - free_act_rules(&defproxy->tcp_req.l4_rules); - free_act_rules(&defproxy->tcp_req.l5_rules); - free_act_rules(&defproxy->http_req_rules); - free_act_rules(&defproxy->http_res_rules); - free_act_rules(&defproxy->http_after_res_rules); h = defproxy->req_cap; while (h) { @@ -1483,19 +1481,6 @@ void proxy_free_defaults(struct proxy *defproxy) h = h_next; } - lf_expr_deinit(&defproxy->logformat); - lf_expr_deinit(&defproxy->logformat_sd); - lf_expr_deinit(&defproxy->logformat_error); - lf_expr_deinit(&defproxy->format_unique_id); - - list_for_each_entry_safe(log, logb, &defproxy->loggers, list) { - LIST_DEL_INIT(&log->list); - free_logger(log); - } - - chunk_destroy(&defproxy->log_tag); - - free_email_alert(defproxy); proxy_release_conf_errors(defproxy); deinit_proxy_tcpcheck(defproxy); diff --git a/src/quic_tx.c b/src/quic_tx.c index 39b9176..84e9f32 100644 --- a/src/quic_tx.c +++ b/src/quic_tx.c @@ -155,16 +155,27 @@ static void qc_txb_store(struct buffer *buf, uint16_t length, const size_t hdlen = sizeof(uint16_t) + sizeof(void *); BUG_ON_HOT(b_contig_space(buf) < hdlen); /* this must not happen */ + /* If first packet is INITIAL, ensure datagram is sufficiently padded. */ + BUG_ON(first_pkt->type == QUIC_PACKET_TYPE_INITIAL && + (first_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) && + length < QUIC_INITIAL_PACKET_MINLEN); + write_u16(b_tail(buf), length); write_ptr(b_tail(buf) + sizeof(length), first_pkt); b_add(buf, hdlen + length); } -/* Returns 1 if a packet may be built for <qc> from <qel> encryption level - * with <frms> as ack-eliciting frame list to send, 0 if not. - * <cc> must equal to 1 if an immediate close was asked, 0 if not. - * <probe> must equalt to 1 if a probing packet is required, 0 if not. - * Also set <*must_ack> to inform the caller if an acknowledgement should be sent. +/* Reports if data are ready to be sent for <qel> encryption level on <qc> + * connection. + * + * <frms> is the ack-eliciting frames list to send, if any. Other parameters + * can be set individually for some special frame types : <cc> for immediate + * close, <probe> to emit probing frames. + * + * This function will also set <must_ack> to inform the caller that an + * acknowledgement should be sent. + * + * Returns true if data to emit else false. */ static int qc_may_build_pkt(struct quic_conn *qc, struct list *frms, struct quic_enc_level *qel, int cc, int probe, @@ -330,15 +341,7 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) for (pkt = first_pkt; pkt; pkt = next_pkt) { struct quic_cc *cc = &qc->path->cc; - /* RFC 9000 14.1 Initial datagram size - * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting - * Initial packets to at least the smallest allowed maximum datagram size of - * 1200 bytes. - */ qc->cntrs.sent_pkt++; - BUG_ON_HOT(pkt->type == QUIC_PACKET_TYPE_INITIAL && - (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) && - dglen < QUIC_INITIAL_PACKET_MINLEN); pkt->time_sent = time_sent; if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) { @@ -510,7 +513,7 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, list_for_each_entry_safe(qel, tmp_qel, qels, el_send) { struct quic_tls_ctx *tls_ctx; const struct quic_version *ver; - struct list *frms = qel->send_frms, *next_frms; + struct list *frms = qel->send_frms; struct quic_enc_level *next_qel; if (qel == qc->eel) { @@ -521,14 +524,9 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, qc_select_tls_ver(qc, qel, &tls_ctx, &ver); /* Retrieve next QEL. Set it to NULL if on qels last element. */ - if (qel->el_send.n != qels) { - next_qel = LIST_ELEM(qel->el_send.n, struct quic_enc_level *, el_send); - next_frms = next_qel->send_frms; - } - else { - next_qel = NULL; - next_frms = NULL; - } + next_qel = LIST_NEXT(&qel->el_send, struct quic_enc_level *, el_send); + if (&next_qel->el_send == qels) + next_qel = NULL; /* Build as much as datagrams at <qel> encryption level. * Each datagram is prepended with its length followed by the address @@ -546,7 +544,9 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, if (!cc) probe = qel->pktns->tx.pto_probe; - if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) { + /* Remove QEL if nothing to send anymore. Padding is only emitted for last QEL. */ + if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack) && + (!padding || next_qel)) { /* Remove qel from send_list if nothing to send. */ LIST_DEL_INIT(&qel->el_send); qel->send_frms = NULL; @@ -577,30 +577,28 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, } /* RFC 9000 14.1 Initial datagram size - * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting - * Initial packets to at least the smallest allowed maximum datagram size of - * 1200 bytes. * - * Ensure that no ack-eliciting packets are sent into too small datagrams + * Similarly, a server MUST expand the payload of all UDP + * datagrams carrying ack-eliciting Initial packets to at least the + * smallest allowed maximum datagram size of 1200 bytes. */ - if (qel == qc->iel && !LIST_ISEMPTY(frms)) { + if (qel == qc->iel && (!LIST_ISEMPTY(frms) || probe)) { + /* Ensure that no ack-eliciting packets are sent into too small datagrams */ if (end - pos < QUIC_INITIAL_PACKET_MINLEN) { TRACE_PROTO("No more enough room to build an Initial packet", QUIC_EV_CONN_PHPKTS, qc); break; } - /* Pad this Initial packet if there is no ack-eliciting frames to send from - * the next packet number space. - */ - if (!next_frms || LIST_ISEMPTY(next_frms)) - padding = 1; + /* padding will be set for last QEL */ + padding = 1; } pkt_type = quic_enc_level_pkt_type(qc, qel); cur_pkt = qc_build_pkt(&pos, end, qel, tls_ctx, frms, qc, ver, dglen, pkt_type, - must_ack, padding, probe, cc, &err); + must_ack, padding && !next_qel, + probe, cc, &err); switch (err) { case -3: if (first_pkt) @@ -628,6 +626,10 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, total += cur_pkt->len; dglen += cur_pkt->len; + /* Reset padding if datagram is big enough. */ + if (dglen >= QUIC_INITIAL_PACKET_MINLEN) + padding = 0; + if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA) cur_pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA; @@ -647,12 +649,7 @@ static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, * the same datagram, except if <qel> is the Application data * encryption level which cannot be selected to do that. */ - if (LIST_ISEMPTY(frms) && qel != qc->ael && next_qel) { - if (qel == qc->iel && - (!qc_is_listener(qc) || - cur_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) - padding = 1; - + if (LIST_ISEMPTY(frms) && next_qel) { prv_pkt = cur_pkt; } else { diff --git a/src/resolvers.c b/src/resolvers.c index 47b0cce..640c9c2 100644 --- a/src/resolvers.c +++ b/src/resolvers.c @@ -3919,8 +3919,12 @@ static int rslv_promex_metric_info(unsigned int id, struct promex_metric *metric static void *rslv_promex_start_ts(void *unused, unsigned int id) { - struct resolvers *resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list); + struct resolvers *resolver; + if (LIST_ISEMPTY(&sec_resolvers)) + return NULL; + + resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list); return LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list); } |