diff options
Diffstat (limited to 'src/quic_cc_cubic.c')
-rw-r--r-- | src/quic_cc_cubic.c | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c new file mode 100644 index 0000000..dc6ef9f --- /dev/null +++ b/src/quic_cc_cubic.c @@ -0,0 +1,283 @@ +#include <haproxy/trace.h> +#include <haproxy/quic_cc.h> + +/* This source file is highly inspired from Linux kernel source file + * implementation for TCP Cubic. In fact, we have no choice if we do + * not want to use any floating point operations to be fast! + * (See net/ipv4/tcp_cubic.c) + */ +#define TRACE_SOURCE &trace_quic + +#define CUBIC_BETA_SCALE 1024 +#define CUBIC_BETA_SCALE_SHIFT 10 +/* beta = 0.7 ; C = 0.4 */ +#define CUBIC_BETA 717 /* CUBIC_BETA / CUBIC_BETA_SCALE = 0.7 */ +#define CUBIC_C 410 /* CUBIC_C / CUBIC_BETA_SCALE = 0.4 */ + +#define CUBIC_BETA_SCALE_FACTOR_SHIFT (3 * CUBIC_BETA_SCALE_SHIFT) +#define TIME_SCALE_FACTOR_SHIFT 10 + +/* The maximum value which may be cubed an multiplied by CUBIC_BETA */ +#define CUBIC_DIFF_TIME_LIMIT 355535ULL /* ms */ + +/* K cube factor: (1 - beta) / c */ +struct cubic { + uint32_t ssthresh; + uint32_t remaining_inc; + uint32_t remaining_tcp_inc; + uint32_t epoch_start; + uint32_t origin_point; + uint32_t K; + uint32_t last_w_max; + uint32_t tcp_wnd; + uint32_t recovery_start_time; +}; + +static void quic_cc_cubic_reset(struct quic_cc *cc) +{ + struct cubic *c = quic_cc_priv(cc); + + cc->algo->state = QUIC_CC_ST_SS; + + c->ssthresh = QUIC_CC_INFINITE_SSTHESH; + c->remaining_inc = 0; + c->remaining_tcp_inc = 0; + c->epoch_start = 0; + c->origin_point = 0; + c->K = 0; + c->last_w_max = 0; + c->tcp_wnd = 0; + c->recovery_start_time = 0; +} + +static int quic_cc_cubic_init(struct quic_cc *cc) +{ + quic_cc_cubic_reset(cc); + return 1; +} + +/* Cubic root. + * Highly inspired from Linux kernel sources. + * See net/ipv4/tcp_cubic.c + */ +static uint32_t cubic_root(uint64_t val) +{ + uint32_t x, b, shift; + + static const uint8_t v[] = { + 0, 54, 54, 54, 118, 118, 118, 118, + 123, 129, 134, 138, 143, 147, 151, 156, + 157, 161, 164, 168, 170, 173, 176, 179, + 181, 185, 187, 190, 192, 194, 197, 199, + 200, 202, 204, 206, 209, 211, 213, 215, + 217, 219, 221, 222, 224, 225, 227, 229, + 231, 232, 234, 236, 237, 239, 240, 242, + 244, 245, 246, 248, 250, 251, 252, 254, + }; + + if (!val || (b = my_flsl(val)) < 7) { + /* val in [0..63] */ + return ((uint32_t)v[(uint32_t)val] + 35) >> 6; + } + + b = ((b * 84) >> 8) - 1; + shift = (val >> (b * 3)); + + x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6; + + x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1))); + x = ((x * 341) >> 10); + + return x; +} + +static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) +{ + struct cubic *c = quic_cc_priv(cc); + struct quic_path *path = container_of(cc, struct quic_path, cc); + /* Current cwnd as number of packets */ + uint32_t t, target, inc, inc_diff; + uint64_t delta, diff; + + if (!c->epoch_start) { + c->epoch_start = now_ms; + if (c->last_w_max <= path->cwnd) { + c->K = 0; + c->origin_point = path->cwnd; + } + else { + /* K = cubic_root((1 - beta) * W_max / C) */ + c->K = cubic_root((c->last_w_max - path->cwnd) * + (CUBIC_BETA_SCALE - CUBIC_BETA) / CUBIC_C / path->mtu) << TIME_SCALE_FACTOR_SHIFT; + c->origin_point = c->last_w_max; + } + + c->tcp_wnd = path->cwnd; + c->remaining_inc = 0; + c->remaining_tcp_inc = 0; + } + + t = now_ms + path->loss.rtt_min - c->epoch_start; + if (t < c->K) { + diff = c->K - t; + } + else { + diff = t - c->K; + } + + if (diff > CUBIC_DIFF_TIME_LIMIT) { + /* TODO : should not happen if we handle the case + * of very late acks receipt. This must be handled as a congestion + * control event: a very late ack should trigger a congestion + * control algorithm reset. + */ + quic_cc_cubic_reset(cc); + return; + } + + delta = path->mtu * ((CUBIC_C * diff * diff * diff) >> (10 + 3 * TIME_SCALE_FACTOR_SHIFT)); + if (t < c->K) + target = c->origin_point - delta; + else + target = c->origin_point + delta; + + if (target > path->cwnd) { + inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd); + c->remaining_inc = inc_diff % path->cwnd; + inc = inc_diff / path->cwnd; + } + else { + /* small increment */ + inc_diff = c->remaining_inc + path->mtu; + c->remaining_inc = inc_diff % (100 * path->cwnd); + inc = inc_diff / (100 * path->cwnd); + } + + inc_diff = c->remaining_tcp_inc + path->mtu * acked; + c->tcp_wnd += inc_diff / path->cwnd; + c->remaining_tcp_inc = inc_diff % path->cwnd; + /* TCP friendliness */ + if (c->tcp_wnd > path->cwnd) { + uint32_t tcp_inc = path->mtu * (c->tcp_wnd - path->cwnd) / path->cwnd; + if (tcp_inc > inc) + inc = tcp_inc; + } + + path->cwnd += inc; +} + +static void quic_cc_cubic_slow_start(struct quic_cc *cc) +{ + quic_cc_cubic_reset(cc); +} + +static void quic_enter_recovery(struct quic_cc *cc) +{ + struct quic_path *path = container_of(cc, struct quic_path, cc); + struct cubic *c = quic_cc_priv(cc); + /* Current cwnd as number of packets */ + + c->epoch_start = 0; + c->recovery_start_time = now_ms; + /* Fast convergence */ + if (path->cwnd < c->last_w_max) { + /* (1 + beta) * path->cwnd / 2 */ + c->last_w_max = (path->cwnd * (CUBIC_BETA_SCALE + CUBIC_BETA) / 2) >> CUBIC_BETA_SCALE_SHIFT; + } + else { + c->last_w_max = path->cwnd; + } + path->cwnd = (CUBIC_BETA * path->cwnd) >> CUBIC_BETA_SCALE_SHIFT; + c->ssthresh = QUIC_MAX(path->cwnd, path->min_cwnd); +} + +/* Congestion slow-start callback. */ +static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) +{ + struct quic_path *path = container_of(cc, struct quic_path, cc); + struct cubic *c = quic_cc_priv(cc); + + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); + switch (ev->type) { + case QUIC_CC_EVT_ACK: + /* Do not increase the congestion window in recovery period. */ + if (ev->ack.time_sent <= c->recovery_start_time) + goto out; + + path->cwnd += ev->ack.acked; + /* Exit to congestion avoidance if slow start threshold is reached. */ + if (path->cwnd >= c->ssthresh) + cc->algo->state = QUIC_CC_ST_CA; + break; + + case QUIC_CC_EVT_LOSS: + /* Do not decrease the congestion window when already in recovery period. */ + if (ev->loss.time_sent <= c->recovery_start_time) + goto out; + + quic_enter_recovery(cc); + /* Exit to congestion avoidance. */ + cc->algo->state = QUIC_CC_ST_CA; + break; + + case QUIC_CC_EVT_ECN_CE: + /* TODO */ + break; + } + + out: + TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); +} + +/* Congestion avoidance callback. */ +static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) +{ + struct cubic *c = quic_cc_priv(cc); + + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); + switch (ev->type) { + case QUIC_CC_EVT_ACK: + /* Do not increase the congestion window when already in recovery period. */ + if (ev->ack.time_sent <= c->recovery_start_time) + goto out; + + quic_cubic_update(cc, ev->ack.acked); + break; + case QUIC_CC_EVT_LOSS: + /* Do not decrease the congestion window when already in recovery period. */ + if (ev->loss.time_sent <= c->recovery_start_time) + goto out; + + quic_enter_recovery(cc); + break; + case QUIC_CC_EVT_ECN_CE: + /* TODO */ + break; + } + + out: + TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); +} + +static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, + struct quic_cc_event *ev) = { + [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, + [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, +}; + +static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) +{ + return quic_cc_cubic_state_cbs[cc->algo->state](cc, ev); +} + +static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) +{ +} + +struct quic_cc_algo quic_cc_algo_cubic = { + .type = QUIC_CC_ALGO_TP_CUBIC, + .init = quic_cc_cubic_init, + .event = quic_cc_cubic_event, + .slow_start = quic_cc_cubic_slow_start, + .state_trace = quic_cc_cubic_state_trace, +}; |