summaryrefslogtreecommitdiffstats
path: root/src/quic_loss.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/quic_loss.c')
-rw-r--r--src/quic_loss.c312
1 files changed, 312 insertions, 0 deletions
diff --git a/src/quic_loss.c b/src/quic_loss.c
new file mode 100644
index 0000000..fd9568a
--- /dev/null
+++ b/src/quic_loss.c
@@ -0,0 +1,312 @@
+#include <import/eb64tree.h>
+
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_loss.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+
+#include <haproxy/atomic.h>
+#include <haproxy/list.h>
+#include <haproxy/ticks.h>
+#include <haproxy/trace.h>
+
+/* Update <ql> QUIC loss information with new <rtt> measurement and <ack_delay>
+ * on ACK frame receipt which MUST be min(ack->ack_delay, max_ack_delay)
+ * before the handshake is confirmed.
+ */
+void quic_loss_srtt_update(struct quic_loss *ql,
+ unsigned int rtt, unsigned int ack_delay,
+ struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_RTTUPDT, qc);
+ TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc, &rtt, &ack_delay, ql);
+
+ ql->latest_rtt = rtt;
+ if (!ql->rtt_min) {
+ /* No previous measurement. */
+ ql->srtt = rtt;
+ ql->rtt_var = rtt / 2;
+ ql->rtt_min = rtt;
+ }
+ else {
+ int diff;
+
+ ql->rtt_min = QUIC_MIN(rtt, ql->rtt_min);
+ /* Specific to QUIC (RTT adjustment). */
+ if (ack_delay && rtt >= ql->rtt_min + ack_delay)
+ rtt -= ack_delay;
+ diff = ql->srtt - rtt;
+ if (diff < 0)
+ diff = -diff;
+ ql->rtt_var = (3 * ql->rtt_var + diff) / 4;
+ ql->srtt = (7 * ql->srtt + rtt) / 8;
+ }
+
+ TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc,,, ql);
+ TRACE_LEAVE(QUIC_EV_CONN_RTTUPDT, qc);
+}
+
+/* Returns for <qc> QUIC connection the first packet number space which
+ * experienced packet loss, if any or a packet number space with
+ * TICK_ETERNITY as packet loss time if not.
+ */
+struct quic_pktns *quic_loss_pktns(struct quic_conn *qc)
+{
+ struct quic_pktns *pktns, *p;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+
+ BUG_ON(LIST_ISEMPTY(&qc->pktns_list));
+ pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list);
+
+ do {
+ TRACE_PROTO("TX loss pktns", QUIC_EV_CONN_SPTO, qc, p);
+ if (!tick_isset(pktns->tx.loss_time) ||
+ tick_is_lt(p->tx.loss_time, pktns->tx.loss_time)) {
+ pktns = p;
+ }
+ p = LIST_NEXT(&p->list, struct quic_pktns *, list);
+ } while (&p->list != &qc->pktns_list);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc);
+
+ return pktns;
+}
+
+/* Returns for <qc> QUIC connection the first packet number space to
+ * arm the PTO for if any or a packet number space with TICK_ETERNITY
+ * as PTO value if not.
+ */
+struct quic_pktns *quic_pto_pktns(struct quic_conn *qc,
+ int handshake_confirmed,
+ unsigned int *pto)
+{
+ unsigned int duration, lpto;
+ struct quic_loss *ql = &qc->path->loss;
+ struct quic_pktns *pktns, *p;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+
+ BUG_ON(LIST_ISEMPTY(&qc->pktns_list));
+ duration =
+ ql->srtt +
+ (QUIC_MAX(4 * ql->rtt_var, QUIC_TIMER_GRANULARITY) << ql->pto_count);
+
+ /* RFC 9002 6.2.2.1. Before Address Validation
+ *
+ * the client MUST set the PTO timer if the client has not received an
+ * acknowledgment for any of its Handshake packets and the handshake is
+ * not confirmed (see Section 4.1.2 of [QUIC-TLS]), even if there are no
+ * packets in flight.
+ *
+ * TODO implement the above paragraph for QUIC on backend side. Note
+ * that if now_ms is used this function is not reentrant anymore and can
+ * not be used anytime without side-effect (for example after QUIC
+ * connection migration).
+ */
+
+ lpto = TICK_ETERNITY;
+ pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list);
+
+ do {
+ unsigned int tmp_pto;
+
+ if (p->tx.in_flight) {
+ if (p == qc->apktns) {
+ if (!handshake_confirmed) {
+ TRACE_STATE("TX PTO handshake not already confirmed", QUIC_EV_CONN_SPTO, qc);
+ goto out;
+ }
+
+ duration += qc->max_ack_delay << ql->pto_count;
+ }
+
+ tmp_pto = tick_add(p->tx.time_of_last_eliciting, duration);
+ if (!tick_isset(lpto) || tick_is_lt(tmp_pto, lpto)) {
+ lpto = tmp_pto;
+ pktns = p;
+ }
+
+ TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, p);
+ }
+
+ p = LIST_NEXT(&p->list, struct quic_pktns *, list);
+ } while (&p->list != &qc->pktns_list);
+
+ out:
+ if (pto)
+ *pto = lpto;
+ TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, pktns, &duration);
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc);
+
+ return pktns;
+}
+
+/* Look for packet loss from sent packets for <qel> encryption level of a
+ * connection with <ctx> as I/O handler context. If remove is true, remove them from
+ * their tree if deemed as lost or set the <loss_time> value the packet number
+ * space if any not deemed lost.
+ * Should be called after having received an ACK frame with newly acknowledged
+ * packets or when the the loss detection timer has expired.
+ * Always succeeds.
+ */
+void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc,
+ struct list *lost_pkts)
+{
+ struct eb_root *pkts;
+ struct eb64_node *node;
+ struct quic_loss *ql;
+ unsigned int loss_delay;
+ uint64_t pktthresh;
+
+ TRACE_ENTER(QUIC_EV_CONN_PKTLOSS, qc);
+ TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns);
+ pkts = &pktns->tx.pkts;
+ pktns->tx.loss_time = TICK_ETERNITY;
+ if (eb_is_empty(pkts))
+ goto out;
+
+ ql = &qc->path->loss;
+ loss_delay = QUIC_MAX(ql->latest_rtt, ql->srtt);
+ loss_delay = QUIC_MAX(loss_delay, MS_TO_TICKS(QUIC_TIMER_GRANULARITY)) *
+ QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND / QUIC_LOSS_TIME_THRESHOLD_DIVISOR;
+
+ node = eb64_first(pkts);
+
+ /* RFC 9002 6.1.1. Packet Threshold
+ * The RECOMMENDED initial value for the packet reordering threshold
+ * (kPacketThreshold) is 3, based on best practices for TCP loss detection
+ * [RFC5681] [RFC6675]. In order to remain similar to TCP, implementations
+ * SHOULD NOT use a packet threshold less than 3; see [RFC5681].
+
+ * Some networks may exhibit higher degrees of packet reordering, causing a
+ * sender to detect spurious losses. Additionally, packet reordering could be
+ * more common with QUIC than TCP because network elements that could observe
+ * and reorder TCP packets cannot do that for QUIC and also because QUIC
+ * packet numbers are encrypted.
+ */
+
+ /* Dynamic packet reordering threshold calculation depending on the distance
+ * (in packets) between the last transmitted packet and the oldest still in
+ * flight before loss detection.
+ */
+ pktthresh = pktns->tx.next_pn - 1 - eb64_entry(node, struct quic_tx_packet, pn_node)->pn_node.key;
+ /* Apply a ratio to this threshold and add it to QUIC_LOSS_PACKET_THRESHOLD. */
+ pktthresh = pktthresh * global.tune.quic_reorder_ratio / 100 + QUIC_LOSS_PACKET_THRESHOLD;
+ while (node) {
+ struct quic_tx_packet *pkt;
+ int64_t largest_acked_pn;
+ unsigned int loss_time_limit, time_sent;
+ int reordered;
+
+ pkt = eb64_entry(&node->node, struct quic_tx_packet, pn_node);
+ largest_acked_pn = pktns->rx.largest_acked_pn;
+ node = eb64_next(node);
+ if ((int64_t)pkt->pn_node.key > largest_acked_pn)
+ break;
+
+ time_sent = pkt->time_sent;
+ loss_time_limit = tick_add(time_sent, loss_delay);
+
+ reordered = (int64_t)largest_acked_pn >= pkt->pn_node.key + pktthresh;
+ if (reordered)
+ ql->nb_reordered_pkt++;
+
+ if (tick_is_le(loss_time_limit, now_ms) || reordered) {
+ eb64_delete(&pkt->pn_node);
+ LIST_APPEND(lost_pkts, &pkt->list);
+ ql->nb_lost_pkt++;
+ }
+ else {
+ if (tick_isset(pktns->tx.loss_time))
+ pktns->tx.loss_time = tick_first(pktns->tx.loss_time, loss_time_limit);
+ else
+ pktns->tx.loss_time = loss_time_limit;
+ break;
+ }
+ }
+
+ out:
+ TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns, lost_pkts);
+ TRACE_LEAVE(QUIC_EV_CONN_PKTLOSS, qc);
+}
+
+/* Handle <pkts> list of lost packets detected at <now_us> handling their TX
+ * frames. Send a packet loss event to the congestion controller if in flight
+ * packet have been lost. Also frees the packet in <pkts> list.
+ *
+ * Returns 1 on success else 0 if loss limit has been exceeded. A
+ * CONNECTION_CLOSE was prepared to close the connection ASAP.
+ */
+int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
+ struct list *pkts, uint64_t now_us)
+{
+ struct quic_tx_packet *pkt, *tmp, *oldest_lost, *newest_lost;
+ int close = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ if (LIST_ISEMPTY(pkts))
+ goto leave;
+
+ oldest_lost = newest_lost = NULL;
+ list_for_each_entry_safe(pkt, tmp, pkts, list) {
+ struct list tmp = LIST_HEAD_INIT(tmp);
+
+ pkt->pktns->tx.in_flight -= pkt->in_flight_len;
+ qc->path->prep_in_flight -= pkt->in_flight_len;
+ qc->path->in_flight -= pkt->in_flight_len;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
+ qc->path->ifae_pkts--;
+ /* Treat the frames of this lost packet. */
+ if (!qc_handle_frms_of_lost_pkt(qc, pkt, &pktns->tx.frms))
+ close = 1;
+ LIST_DELETE(&pkt->list);
+ if (!oldest_lost) {
+ oldest_lost = newest_lost = pkt;
+ }
+ else {
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+ newest_lost = pkt;
+ }
+ }
+
+ if (!close) {
+ if (newest_lost) {
+ /* Sent a congestion event to the controller */
+ struct quic_cc_event ev = { };
+
+ ev.type = QUIC_CC_EVT_LOSS;
+ ev.loss.time_sent = newest_lost->time_sent;
+
+ quic_cc_event(&qc->path->cc, &ev);
+ }
+
+ /* If an RTT have been already sampled, <rtt_min> has been set.
+ * We must check if we are experiencing a persistent congestion.
+ * If this is the case, the congestion controller must re-enter
+ * slow start state.
+ */
+ if (qc->path->loss.rtt_min && newest_lost != oldest_lost) {
+ unsigned int period = newest_lost->time_sent - oldest_lost->time_sent;
+
+ if (quic_loss_persistent_congestion(&qc->path->loss, period,
+ now_ms, qc->max_ack_delay))
+ qc->path->cc.algo->slow_start(&qc->path->cc);
+ }
+ }
+
+ /* <oldest_lost> cannot be NULL at this stage because we have ensured
+ * that <pkts> list is not empty. Without this, GCC 12.2.0 reports a
+ * possible overflow on a 0 byte region with O2 optimization.
+ */
+ ALREADY_CHECKED(oldest_lost);
+ quic_tx_packet_refdec(oldest_lost);
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return !close;
+}