#include #include #include #include #include #include #include #include #include /* Update QUIC loss information with new measurement and * on ACK frame receipt which MUST be min(ack->ack_delay, max_ack_delay) * before the handshake is confirmed. */ void quic_loss_srtt_update(struct quic_loss *ql, unsigned int rtt, unsigned int ack_delay, struct quic_conn *qc) { TRACE_ENTER(QUIC_EV_CONN_RTTUPDT, qc); TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc, &rtt, &ack_delay, ql); ql->latest_rtt = rtt; if (!ql->rtt_min) { /* No previous measurement. */ ql->srtt = rtt; ql->rtt_var = rtt / 2; ql->rtt_min = rtt; } else { int diff; ql->rtt_min = QUIC_MIN(rtt, ql->rtt_min); /* Specific to QUIC (RTT adjustment). */ if (ack_delay && rtt >= ql->rtt_min + ack_delay) rtt -= ack_delay; diff = ql->srtt - rtt; if (diff < 0) diff = -diff; ql->rtt_var = (3 * ql->rtt_var + diff) / 4; ql->srtt = (7 * ql->srtt + rtt) / 8; } TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc,,, ql); TRACE_LEAVE(QUIC_EV_CONN_RTTUPDT, qc); } /* Returns for QUIC connection the first packet number space which * experienced packet loss, if any or a packet number space with * TICK_ETERNITY as packet loss time if not. */ struct quic_pktns *quic_loss_pktns(struct quic_conn *qc) { struct quic_pktns *pktns, *p; TRACE_ENTER(QUIC_EV_CONN_SPTO, qc); BUG_ON(LIST_ISEMPTY(&qc->pktns_list)); pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list); do { TRACE_PROTO("TX loss pktns", QUIC_EV_CONN_SPTO, qc, p); if (!tick_isset(pktns->tx.loss_time) || tick_is_lt(p->tx.loss_time, pktns->tx.loss_time)) { pktns = p; } p = LIST_NEXT(&p->list, struct quic_pktns *, list); } while (&p->list != &qc->pktns_list); TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc); return pktns; } /* Returns for QUIC connection the first packet number space to * arm the PTO for if any or a packet number space with TICK_ETERNITY * as PTO value if not. */ struct quic_pktns *quic_pto_pktns(struct quic_conn *qc, int handshake_confirmed, unsigned int *pto) { unsigned int duration, lpto; struct quic_loss *ql = &qc->path->loss; struct quic_pktns *pktns, *p; TRACE_ENTER(QUIC_EV_CONN_SPTO, qc); BUG_ON(LIST_ISEMPTY(&qc->pktns_list)); duration = ql->srtt + (QUIC_MAX(4 * ql->rtt_var, QUIC_TIMER_GRANULARITY) << ql->pto_count); /* RFC 9002 6.2.2.1. Before Address Validation * * the client MUST set the PTO timer if the client has not received an * acknowledgment for any of its Handshake packets and the handshake is * not confirmed (see Section 4.1.2 of [QUIC-TLS]), even if there are no * packets in flight. * * TODO implement the above paragraph for QUIC on backend side. Note * that if now_ms is used this function is not reentrant anymore and can * not be used anytime without side-effect (for example after QUIC * connection migration). */ lpto = TICK_ETERNITY; pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list); do { unsigned int tmp_pto; if (p->tx.in_flight) { if (p == qc->apktns) { if (!handshake_confirmed) { TRACE_STATE("TX PTO handshake not already confirmed", QUIC_EV_CONN_SPTO, qc); goto out; } duration += qc->max_ack_delay << ql->pto_count; } tmp_pto = tick_add(p->tx.time_of_last_eliciting, duration); if (!tick_isset(lpto) || tick_is_lt(tmp_pto, lpto)) { lpto = tmp_pto; pktns = p; } TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, p); } p = LIST_NEXT(&p->list, struct quic_pktns *, list); } while (&p->list != &qc->pktns_list); out: if (pto) *pto = lpto; TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, pktns, &duration); TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc); return pktns; } /* Look for packet loss from sent packets for encryption level of a * connection with as I/O handler context. If remove is true, remove them from * their tree if deemed as lost or set the value the packet number * space if any not deemed lost. * Should be called after having received an ACK frame with newly acknowledged * packets or when the the loss detection timer has expired. * Always succeeds. */ void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc, struct list *lost_pkts) { struct eb_root *pkts; struct eb64_node *node; struct quic_loss *ql; unsigned int loss_delay; uint64_t pktthresh; TRACE_ENTER(QUIC_EV_CONN_PKTLOSS, qc); TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns); pkts = &pktns->tx.pkts; pktns->tx.loss_time = TICK_ETERNITY; if (eb_is_empty(pkts)) goto out; ql = &qc->path->loss; loss_delay = QUIC_MAX(ql->latest_rtt, ql->srtt); loss_delay = QUIC_MAX(loss_delay, MS_TO_TICKS(QUIC_TIMER_GRANULARITY)) * QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND / QUIC_LOSS_TIME_THRESHOLD_DIVISOR; node = eb64_first(pkts); /* RFC 9002 6.1.1. Packet Threshold * The RECOMMENDED initial value for the packet reordering threshold * (kPacketThreshold) is 3, based on best practices for TCP loss detection * [RFC5681] [RFC6675]. In order to remain similar to TCP, implementations * SHOULD NOT use a packet threshold less than 3; see [RFC5681]. * Some networks may exhibit higher degrees of packet reordering, causing a * sender to detect spurious losses. Additionally, packet reordering could be * more common with QUIC than TCP because network elements that could observe * and reorder TCP packets cannot do that for QUIC and also because QUIC * packet numbers are encrypted. */ /* Dynamic packet reordering threshold calculation depending on the distance * (in packets) between the last transmitted packet and the oldest still in * flight before loss detection. */ pktthresh = pktns->tx.next_pn - 1 - eb64_entry(node, struct quic_tx_packet, pn_node)->pn_node.key; /* Apply a ratio to this threshold and add it to QUIC_LOSS_PACKET_THRESHOLD. */ pktthresh = pktthresh * global.tune.quic_reorder_ratio / 100 + QUIC_LOSS_PACKET_THRESHOLD; while (node) { struct quic_tx_packet *pkt; int64_t largest_acked_pn; unsigned int loss_time_limit, time_sent; int reordered; pkt = eb64_entry(&node->node, struct quic_tx_packet, pn_node); largest_acked_pn = pktns->rx.largest_acked_pn; node = eb64_next(node); if ((int64_t)pkt->pn_node.key > largest_acked_pn) break; time_sent = pkt->time_sent; loss_time_limit = tick_add(time_sent, loss_delay); reordered = (int64_t)largest_acked_pn >= pkt->pn_node.key + pktthresh; if (reordered) ql->nb_reordered_pkt++; if (tick_is_le(loss_time_limit, now_ms) || reordered) { eb64_delete(&pkt->pn_node); LIST_APPEND(lost_pkts, &pkt->list); ql->nb_lost_pkt++; } else { if (tick_isset(pktns->tx.loss_time)) pktns->tx.loss_time = tick_first(pktns->tx.loss_time, loss_time_limit); else pktns->tx.loss_time = loss_time_limit; break; } } out: TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns, lost_pkts); TRACE_LEAVE(QUIC_EV_CONN_PKTLOSS, qc); } /* Handle list of lost packets detected at handling their TX * frames. Send a packet loss event to the congestion controller if in flight * packet have been lost. Also frees the packet in list. * * Returns 1 on success else 0 if loss limit has been exceeded. A * CONNECTION_CLOSE was prepared to close the connection ASAP. */ int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns, struct list *pkts, uint64_t now_us) { struct quic_tx_packet *pkt, *tmp, *oldest_lost, *newest_lost; int close = 0; TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc); if (LIST_ISEMPTY(pkts)) goto leave; oldest_lost = newest_lost = NULL; list_for_each_entry_safe(pkt, tmp, pkts, list) { struct list tmp = LIST_HEAD_INIT(tmp); pkt->pktns->tx.in_flight -= pkt->in_flight_len; qc->path->prep_in_flight -= pkt->in_flight_len; qc->path->in_flight -= pkt->in_flight_len; if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) qc->path->ifae_pkts--; /* Treat the frames of this lost packet. */ if (!qc_handle_frms_of_lost_pkt(qc, pkt, &pktns->tx.frms)) close = 1; LIST_DELETE(&pkt->list); if (!oldest_lost) { oldest_lost = newest_lost = pkt; } else { if (newest_lost != oldest_lost) quic_tx_packet_refdec(newest_lost); newest_lost = pkt; } } if (!close) { if (newest_lost) { /* Sent a congestion event to the controller */ struct quic_cc_event ev = { }; ev.type = QUIC_CC_EVT_LOSS; ev.loss.time_sent = newest_lost->time_sent; quic_cc_event(&qc->path->cc, &ev); } /* If an RTT have been already sampled, has been set. * We must check if we are experiencing a persistent congestion. * If this is the case, the congestion controller must re-enter * slow start state. */ if (qc->path->loss.rtt_min && newest_lost != oldest_lost) { unsigned int period = newest_lost->time_sent - oldest_lost->time_sent; if (quic_loss_persistent_congestion(&qc->path->loss, period, now_ms, qc->max_ack_delay)) qc->path->cc.algo->slow_start(&qc->path->cc); } } /* cannot be NULL at this stage because we have ensured * that list is not empty. Without this, GCC 12.2.0 reports a * possible overflow on a 0 byte region with O2 optimization. */ ALREADY_CHECKED(oldest_lost); quic_tx_packet_refdec(oldest_lost); if (newest_lost != oldest_lost) quic_tx_packet_refdec(newest_lost); leave: TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc); return !close; }