diff options
Diffstat (limited to 'src/network/tc')
44 files changed, 6595 insertions, 0 deletions
diff --git a/src/network/tc/cake.c b/src/network/tc/cake.c new file mode 100644 index 0000000..c495faf --- /dev/null +++ b/src/network/tc/cake.c @@ -0,0 +1,737 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "cake.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-table.h" +#include "string-util.h" + +static int cake_init(QDisc *qdisc) { + CommonApplicationsKeptEnhanced *c; + + assert(qdisc); + + c = CAKE(qdisc); + + c->autorate = -1; + c->compensation_mode = _CAKE_COMPENSATION_MODE_INVALID; + c->raw = -1; + c->flow_isolation_mode = _CAKE_FLOW_ISOLATION_MODE_INVALID; + c->nat = -1; + c->preset = _CAKE_PRESET_INVALID; + c->wash = -1; + c->split_gso = -1; + c->ack_filter = _CAKE_ACK_FILTER_INVALID; + + return 0; +} + +static int cake_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + CommonApplicationsKeptEnhanced *c; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(c = CAKE(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "cake"); + if (r < 0) + return r; + + if (c->bandwidth > 0) { + r = sd_netlink_message_append_u64(req, TCA_CAKE_BASE_RATE64, c->bandwidth); + if (r < 0) + return r; + } + + if (c->autorate >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_AUTORATE, c->autorate); + if (r < 0) + return r; + } + + if (c->overhead_set) { + r = sd_netlink_message_append_s32(req, TCA_CAKE_OVERHEAD, c->overhead); + if (r < 0) + return r; + } + + if (c->mpu > 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_MPU, c->mpu); + if (r < 0) + return r; + } + + if (c->compensation_mode >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_ATM, c->compensation_mode); + if (r < 0) + return r; + } + + if (c->raw > 0) { + /* TCA_CAKE_RAW attribute is mostly a flag, not boolean. */ + r = sd_netlink_message_append_u32(req, TCA_CAKE_RAW, 0); + if (r < 0) + return r; + } + + if (c->flow_isolation_mode >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_FLOW_MODE, c->flow_isolation_mode); + if (r < 0) + return r; + } + + if (c->nat >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_NAT, c->nat); + if (r < 0) + return r; + } + + if (c->preset >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_DIFFSERV_MODE, c->preset); + if (r < 0) + return r; + } + + if (c->fwmark > 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_FWMARK, c->fwmark); + if (r < 0) + return r; + } + + if (c->wash >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_WASH, c->wash); + if (r < 0) + return r; + } + + if (c->split_gso >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_SPLIT_GSO, c->split_gso); + if (r < 0) + return r; + } + + if (c->rtt > 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_RTT, c->rtt); + if (r < 0) + return r; + } + + if (c->ack_filter >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CAKE_ACK_FILTER, c->ack_filter); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_cake_bandwidth( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + uint64_t k; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->bandwidth = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_size(rvalue, 1000, &k); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->bandwidth = k/8; + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_cake_overhead( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + int32_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->overhead_set = false; + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atoi32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (v < -64 || v > 256) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->overhead = v; + c->overhead_set = true; + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_cake_mpu( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + uint32_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->mpu = 0; + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (v <= 0 || v > 256) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->mpu = v; + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_cake_tristate( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + int *dest, r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (streq(lvalue, "AutoRateIngress")) + dest = &c->autorate; + else if (streq(lvalue, "UseRawPacketSize")) + dest = &c->raw; + else if (streq(lvalue, "NAT")) + dest = &c->nat; + else if (streq(lvalue, "Wash")) + dest = &c->wash; + else if (streq(lvalue, "SplitGSO")) + dest = &c->split_gso; + else + assert_not_reached(); + + r = parse_tristate(rvalue, dest); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + return 0; +} + +static const char * const cake_compensation_mode_table[_CAKE_COMPENSATION_MODE_MAX] = { + [CAKE_COMPENSATION_MODE_NONE] = "none", + [CAKE_COMPENSATION_MODE_ATM] = "atm", + [CAKE_COMPENSATION_MODE_PTM] = "ptm", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(cake_compensation_mode, CakeCompensationMode); + +int config_parse_cake_compensation_mode( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + CakeCompensationMode mode; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->compensation_mode = _CAKE_COMPENSATION_MODE_INVALID; + TAKE_PTR(qdisc); + return 0; + } + + mode = cake_compensation_mode_from_string(rvalue); + if (mode < 0) { + log_syntax(unit, LOG_WARNING, filename, line, mode, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->compensation_mode = mode; + TAKE_PTR(qdisc); + return 0; +} + +static const char * const cake_flow_isolation_mode_table[_CAKE_FLOW_ISOLATION_MODE_MAX] = { + [CAKE_FLOW_ISOLATION_MODE_NONE] = "none", + [CAKE_FLOW_ISOLATION_MODE_SRC_IP] = "src-host", + [CAKE_FLOW_ISOLATION_MODE_DST_IP] = "dst-host", + [CAKE_FLOW_ISOLATION_MODE_HOSTS] = "hosts", + [CAKE_FLOW_ISOLATION_MODE_FLOWS] = "flows", + [CAKE_FLOW_ISOLATION_MODE_DUAL_SRC] = "dual-src-host", + [CAKE_FLOW_ISOLATION_MODE_DUAL_DST] = "dual-dst-host", + [CAKE_FLOW_ISOLATION_MODE_TRIPLE] = "triple", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(cake_flow_isolation_mode, CakeFlowIsolationMode); + +int config_parse_cake_flow_isolation_mode( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + CakeFlowIsolationMode mode; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->flow_isolation_mode = _CAKE_FLOW_ISOLATION_MODE_INVALID; + TAKE_PTR(qdisc); + return 0; + } + + mode = cake_flow_isolation_mode_from_string(rvalue); + if (mode < 0) { + log_syntax(unit, LOG_WARNING, filename, line, mode, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->flow_isolation_mode = mode; + TAKE_PTR(qdisc); + return 0; +} + +static const char * const cake_priority_queueing_preset_table[_CAKE_PRESET_MAX] = { + [CAKE_PRESET_DIFFSERV3] = "diffserv3", + [CAKE_PRESET_DIFFSERV4] = "diffserv4", + [CAKE_PRESET_DIFFSERV8] = "diffserv8", + [CAKE_PRESET_BESTEFFORT] = "besteffort", + [CAKE_PRESET_PRECEDENCE] = "precedence", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(cake_priority_queueing_preset, CakePriorityQueueingPreset); + +int config_parse_cake_priority_queueing_preset( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + CakePriorityQueueingPreset preset; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->preset = _CAKE_PRESET_INVALID; + TAKE_PTR(qdisc); + return 0; + } + + preset = cake_priority_queueing_preset_from_string(rvalue); + if (preset < 0) { + log_syntax(unit, LOG_WARNING, filename, line, preset, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->preset = preset; + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_cake_fwmark( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + uint32_t fwmark; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->fwmark = 0; + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &fwmark); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (fwmark <= 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->fwmark = fwmark; + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_cake_rtt( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + Network *network = ASSERT_PTR(data); + usec_t t; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->rtt = 0; + TAKE_PTR(qdisc); + return 0; + } + + r = parse_sec(rvalue, &t); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (t <= 0 || t > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->rtt = t; + TAKE_PTR(qdisc); + return 0; +} + +static const char * const cake_ack_filter_table[_CAKE_ACK_FILTER_MAX] = { + [CAKE_ACK_FILTER_NO] = "no", + [CAKE_ACK_FILTER_YES] = "yes", + [CAKE_ACK_FILTER_AGGRESSIVE] = "aggressive", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(cake_ack_filter, CakeAckFilter, CAKE_ACK_FILTER_YES); + +int config_parse_cake_ack_filter( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + CommonApplicationsKeptEnhanced *c; + CakeAckFilter ack_filter; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + c = CAKE(qdisc); + + if (isempty(rvalue)) { + c->ack_filter = _CAKE_ACK_FILTER_INVALID; + TAKE_PTR(qdisc); + return 0; + } + + ack_filter = cake_ack_filter_from_string(rvalue); + if (ack_filter < 0) { + log_syntax(unit, LOG_WARNING, filename, line, ack_filter, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + c->ack_filter = ack_filter; + TAKE_PTR(qdisc); + return 0; +} + +const QDiscVTable cake_vtable = { + .object_size = sizeof(CommonApplicationsKeptEnhanced), + .tca_kind = "cake", + .init = cake_init, + .fill_message = cake_fill_message, +}; diff --git a/src/network/tc/cake.h b/src/network/tc/cake.h new file mode 100644 index 0000000..5ca6dc6 --- /dev/null +++ b/src/network/tc/cake.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include <linux/pkt_sched.h> + +#include "conf-parser.h" +#include "qdisc.h" + +typedef enum CakeCompensationMode { + CAKE_COMPENSATION_MODE_NONE = CAKE_ATM_NONE, + CAKE_COMPENSATION_MODE_ATM = CAKE_ATM_ATM, + CAKE_COMPENSATION_MODE_PTM = CAKE_ATM_PTM, + _CAKE_COMPENSATION_MODE_MAX, + _CAKE_COMPENSATION_MODE_INVALID = -EINVAL, +} CakeCompensationMode; + +typedef enum CakeFlowIsolationMode { + CAKE_FLOW_ISOLATION_MODE_NONE = CAKE_FLOW_NONE, + CAKE_FLOW_ISOLATION_MODE_SRC_IP = CAKE_FLOW_SRC_IP, + CAKE_FLOW_ISOLATION_MODE_DST_IP = CAKE_FLOW_DST_IP, + CAKE_FLOW_ISOLATION_MODE_HOSTS = CAKE_FLOW_HOSTS, + CAKE_FLOW_ISOLATION_MODE_FLOWS = CAKE_FLOW_FLOWS, + CAKE_FLOW_ISOLATION_MODE_DUAL_SRC = CAKE_FLOW_DUAL_SRC, + CAKE_FLOW_ISOLATION_MODE_DUAL_DST = CAKE_FLOW_DUAL_DST, + CAKE_FLOW_ISOLATION_MODE_TRIPLE = CAKE_FLOW_TRIPLE, + _CAKE_FLOW_ISOLATION_MODE_MAX, + _CAKE_FLOW_ISOLATION_MODE_INVALID = -EINVAL, +} CakeFlowIsolationMode; + +typedef enum CakePriorityQueueingPreset { + CAKE_PRESET_DIFFSERV3 = CAKE_DIFFSERV_DIFFSERV3, + CAKE_PRESET_DIFFSERV4 = CAKE_DIFFSERV_DIFFSERV4, + CAKE_PRESET_DIFFSERV8 = CAKE_DIFFSERV_DIFFSERV8, + CAKE_PRESET_BESTEFFORT = CAKE_DIFFSERV_BESTEFFORT, + CAKE_PRESET_PRECEDENCE = CAKE_DIFFSERV_PRECEDENCE, + _CAKE_PRESET_MAX, + _CAKE_PRESET_INVALID = -EINVAL, +} CakePriorityQueueingPreset; + +typedef enum CakeAckFilter { + CAKE_ACK_FILTER_NO = CAKE_ACK_NONE, + CAKE_ACK_FILTER_YES = CAKE_ACK_FILTER, + CAKE_ACK_FILTER_AGGRESSIVE = CAKE_ACK_AGGRESSIVE, + _CAKE_ACK_FILTER_MAX, + _CAKE_ACK_FILTER_INVALID = -EINVAL, +} CakeAckFilter; + +typedef struct CommonApplicationsKeptEnhanced { + QDisc meta; + + /* Shaper parameters */ + int autorate; + uint64_t bandwidth; + + /* Overhead compensation parameters */ + bool overhead_set; + int overhead; + uint32_t mpu; + CakeCompensationMode compensation_mode; + int raw; + + /* Flow isolation parameters */ + CakeFlowIsolationMode flow_isolation_mode; + int nat; + + /* Priority queue parameters */ + CakePriorityQueueingPreset preset; + uint32_t fwmark; + + /* Other parameters */ + int wash; + int split_gso; + usec_t rtt; + CakeAckFilter ack_filter; +} CommonApplicationsKeptEnhanced; + +DEFINE_QDISC_CAST(CAKE, CommonApplicationsKeptEnhanced); +extern const QDiscVTable cake_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_cake_bandwidth); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_overhead); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_mpu); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_tristate); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_compensation_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_flow_isolation_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_priority_queueing_preset); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_fwmark); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_rtt); +CONFIG_PARSER_PROTOTYPE(config_parse_cake_ack_filter); diff --git a/src/network/tc/codel.c b/src/network/tc/codel.c new file mode 100644 index 0000000..e212523 --- /dev/null +++ b/src/network/tc/codel.c @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-util.h" + +static int controlled_delay_init(QDisc *qdisc) { + ControlledDelay *cd; + + assert(qdisc); + + cd = CODEL(qdisc); + + cd->ce_threshold_usec = USEC_INFINITY; + cd->ecn = -1; + + return 0; +} + +static int controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + ControlledDelay *cd; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(cd = CODEL(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "codel"); + if (r < 0) + return r; + + if (cd->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_CODEL_LIMIT, cd->packet_limit); + if (r < 0) + return r; + } + + if (cd->interval_usec > 0) { + r = sd_netlink_message_append_u32(req, TCA_CODEL_INTERVAL, cd->interval_usec); + if (r < 0) + return r; + } + + if (cd->target_usec > 0) { + r = sd_netlink_message_append_u32(req, TCA_CODEL_TARGET, cd->target_usec); + if (r < 0) + return r; + } + + if (cd->ecn >= 0) { + r = sd_netlink_message_append_u32(req, TCA_CODEL_ECN, cd->ecn); + if (r < 0) + return r; + } + + if (cd->ce_threshold_usec != USEC_INFINITY) { + r = sd_netlink_message_append_u32(req, TCA_CODEL_CE_THRESHOLD, cd->ce_threshold_usec); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_controlled_delay_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + ControlledDelay *cd; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + cd = CODEL(qdisc); + + if (isempty(rvalue)) { + cd->packet_limit = 0; + + qdisc = NULL; + return 0; + } + + r = safe_atou32(rvalue, &cd->packet_limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + qdisc = NULL; + + return 0; +} + +int config_parse_controlled_delay_usec( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + ControlledDelay *cd; + Network *network = ASSERT_PTR(data); + usec_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + cd = CODEL(qdisc); + + if (streq(lvalue, "TargetSec")) + p = &cd->target_usec; + else if (streq(lvalue, "IntervalSec")) + p = &cd->interval_usec; + else if (streq(lvalue, "CEThresholdSec")) + p = &cd->ce_threshold_usec; + else + assert_not_reached(); + + if (isempty(rvalue)) { + if (streq(lvalue, "CEThresholdSec")) + *p = USEC_INFINITY; + else + *p = 0; + + qdisc = NULL; + return 0; + } + + r = parse_sec(rvalue, p); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + qdisc = NULL; + + return 0; +} + +int config_parse_controlled_delay_bool( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + ControlledDelay *cd; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + cd = CODEL(qdisc); + + r = parse_tristate(rvalue, &cd->ecn); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable codel_vtable = { + .object_size = sizeof(ControlledDelay), + .tca_kind = "codel", + .init = controlled_delay_init, + .fill_message = controlled_delay_fill_message, +}; diff --git a/src/network/tc/codel.h b/src/network/tc/codel.h new file mode 100644 index 0000000..4fe5283 --- /dev/null +++ b/src/network/tc/codel.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "time-util.h" + +typedef struct ControlledDelay { + QDisc meta; + + uint32_t packet_limit; + usec_t interval_usec; + usec_t target_usec; + usec_t ce_threshold_usec; + int ecn; +} ControlledDelay; + +DEFINE_QDISC_CAST(CODEL, ControlledDelay); +extern const QDiscVTable codel_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_u32); +CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_usec); +CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_bool); diff --git a/src/network/tc/drr.c b/src/network/tc/drr.c new file mode 100644 index 0000000..373911b --- /dev/null +++ b/src/network/tc/drr.c @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "drr.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" + +const QDiscVTable drr_vtable = { + .object_size = sizeof(DeficitRoundRobinScheduler), + .tca_kind = "drr", +}; + +static int drr_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) { + DeficitRoundRobinSchedulerClass *drr; + int r; + + assert(link); + assert(tclass); + assert(req); + + assert_se(drr = TCLASS_TO_DRR(tclass)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "drr"); + if (r < 0) + return r; + + if (drr->quantum > 0) { + r = sd_netlink_message_append_u32(req, TCA_DRR_QUANTUM, drr->quantum); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_drr_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + DeficitRoundRobinSchedulerClass *drr; + Network *network = ASSERT_PTR(data); + uint64_t u; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_DRR, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + drr = TCLASS_TO_DRR(tclass); + + if (isempty(rvalue)) { + drr->quantum = 0; + + TAKE_PTR(tclass); + return 0; + } + + r = parse_size(rvalue, 1024, &u); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (u > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + drr->quantum = (uint32_t) u; + + TAKE_PTR(tclass); + return 0; +} + +const TClassVTable drr_tclass_vtable = { + .object_size = sizeof(DeficitRoundRobinSchedulerClass), + .tca_kind = "drr", + .fill_message = drr_class_fill_message, +}; diff --git a/src/network/tc/drr.h b/src/network/tc/drr.h new file mode 100644 index 0000000..c96cc4d --- /dev/null +++ b/src/network/tc/drr.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "qdisc.h" + +typedef struct DeficitRoundRobinScheduler { + QDisc meta; +} DeficitRoundRobinScheduler; + +DEFINE_QDISC_CAST(DRR, DeficitRoundRobinScheduler); +extern const QDiscVTable drr_vtable; + +typedef struct DeficitRoundRobinSchedulerClass { + TClass meta; + + uint32_t quantum; +} DeficitRoundRobinSchedulerClass; + +DEFINE_TCLASS_CAST(DRR, DeficitRoundRobinSchedulerClass); +extern const TClassVTable drr_tclass_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_drr_size); diff --git a/src/network/tc/ets.c b/src/network/tc/ets.c new file mode 100644 index 0000000..730b0a1 --- /dev/null +++ b/src/network/tc/ets.c @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "ets.h" +#include "extract-word.h" +#include "memory-util.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-util.h" +#include "tc-util.h" + +static int enhanced_transmission_selection_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + EnhancedTransmissionSelection *ets; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(ets = ETS(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "ets"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u8(req, TCA_ETS_NBANDS, ets->n_bands); + if (r < 0) + return r; + + if (ets->n_strict > 0) { + r = sd_netlink_message_append_u8(req, TCA_ETS_NSTRICT, ets->n_strict); + if (r < 0) + return r; + } + + if (ets->n_quanta > 0) { + r = sd_netlink_message_open_container(req, TCA_ETS_QUANTA); + if (r < 0) + return r; + + for (unsigned i = 0; i < ets->n_quanta; i++) { + r = sd_netlink_message_append_u32(req, TCA_ETS_QUANTA_BAND, ets->quanta[i]); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + } + + if (ets->n_prio > 0) { + r = sd_netlink_message_open_container(req, TCA_ETS_PRIOMAP); + if (r < 0) + return r; + + for (unsigned i = 0; i < ets->n_prio; i++) { + r = sd_netlink_message_append_u8(req, TCA_ETS_PRIOMAP_BAND, ets->prio[i]); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_ets_u8( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + EnhancedTransmissionSelection *ets; + Network *network = ASSERT_PTR(data); + uint8_t v, *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ets = ETS(qdisc); + if (streq(lvalue, "Bands")) + p = &ets->n_bands; + else if (streq(lvalue, "StrictBands")) + p = &ets->n_strict; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + qdisc = NULL; + return 0; + } + + r = safe_atou8(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (v > TCQ_ETS_MAX_BANDS) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s='. The value must be <= %d, ignoring assignment: %s", + lvalue, TCQ_ETS_MAX_BANDS, rvalue); + return 0; + } + + *p = v; + qdisc = NULL; + + return 0; +} + +int config_parse_ets_quanta( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + EnhancedTransmissionSelection *ets; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ets = ETS(qdisc); + + if (isempty(rvalue)) { + memzero(ets->quanta, sizeof(uint32_t) * TCQ_ETS_MAX_BANDS); + ets->n_quanta = 0; + + qdisc = NULL; + return 0; + } + + for (const char *p = rvalue;;) { + _cleanup_free_ char *word = NULL; + uint64_t v; + + r = extract_first_word(&p, &word, NULL, 0); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to extract next value, ignoring: %m"); + break; + } + if (r == 0) + break; + + r = parse_size(word, 1024, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, word); + continue; + } + if (v == 0 || v > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, word); + continue; + } + if (ets->n_quanta >= TCQ_ETS_MAX_BANDS) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Too many quanta in '%s=', ignoring assignment: %s", + lvalue, word); + continue; + } + + ets->quanta[ets->n_quanta++] = v; + } + + qdisc = NULL; + + return 0; +} + +int config_parse_ets_prio( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + EnhancedTransmissionSelection *ets; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ets = ETS(qdisc); + + if (isempty(rvalue)) { + memzero(ets->prio, sizeof(uint8_t) * (TC_PRIO_MAX + 1)); + ets->n_prio = 0; + + qdisc = NULL; + return 0; + } + + for (const char *p = rvalue;;) { + _cleanup_free_ char *word = NULL; + uint8_t v; + + r = extract_first_word(&p, &word, NULL, 0); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to extract next value, ignoring: %m"); + break; + } + if (r == 0) + break; + + r = safe_atou8(word, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, word); + continue; + } + if (ets->n_prio > TC_PRIO_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Too many priomap in '%s=', ignoring assignment: %s", + lvalue, word); + continue; + } + + ets->prio[ets->n_prio++] = v; + } + + qdisc = NULL; + + return 0; +} + +static int enhanced_transmission_selection_verify(QDisc *qdisc) { + EnhancedTransmissionSelection *ets; + + assert(qdisc); + + ets = ETS(qdisc); + + if (ets->n_bands == 0) + ets->n_bands = ets->n_strict + ets->n_quanta; + + if (ets->n_bands == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: At least one of Band=, Strict=, or Quanta= must be specified. " + "Ignoring [EnhancedTransmissionSelection] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + if (ets->n_bands < ets->n_strict + ets->n_quanta) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Not enough total bands to cover all the strict bands and quanta. " + "Ignoring [EnhancedTransmissionSelection] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + for (unsigned i = 0; i < ets->n_prio; i++) + if (ets->prio[i] >= ets->n_bands) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: PriorityMap= element is out of bands. " + "Ignoring [EnhancedTransmissionSelection] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + return 0; +} + +const QDiscVTable ets_vtable = { + .object_size = sizeof(EnhancedTransmissionSelection), + .tca_kind = "ets", + .fill_message = enhanced_transmission_selection_fill_message, + .verify = enhanced_transmission_selection_verify, +}; diff --git a/src/network/tc/ets.h b/src/network/tc/ets.h new file mode 100644 index 0000000..b6dd428 --- /dev/null +++ b/src/network/tc/ets.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/pkt_sched.h> + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct EnhancedTransmissionSelection { + QDisc meta; + + uint8_t n_bands; + uint8_t n_strict; + unsigned n_quanta; + uint32_t quanta[TCQ_ETS_MAX_BANDS]; + unsigned n_prio; + uint8_t prio[TC_PRIO_MAX + 1]; +} EnhancedTransmissionSelection; + +DEFINE_QDISC_CAST(ETS, EnhancedTransmissionSelection); +extern const QDiscVTable ets_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_ets_u8); +CONFIG_PARSER_PROTOTYPE(config_parse_ets_quanta); +CONFIG_PARSER_PROTOTYPE(config_parse_ets_prio); diff --git a/src/network/tc/fifo.c b/src/network/tc/fifo.c new file mode 100644 index 0000000..940fa00 --- /dev/null +++ b/src/network/tc/fifo.c @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "fifo.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" + +static int fifo_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + FirstInFirstOut *fifo; + int r; + + assert(link); + assert(qdisc); + assert(req); + + switch (qdisc->kind) { + case QDISC_KIND_PFIFO: + assert_se(fifo = PFIFO(qdisc)); + break; + case QDISC_KIND_BFIFO: + assert_se(fifo = BFIFO(qdisc)); + break; + case QDISC_KIND_PFIFO_HEAD_DROP: + assert_se(fifo = PFIFO_HEAD_DROP(qdisc)); + break; + default: + assert_not_reached(); + } + + const struct tc_fifo_qopt opt = { .limit = fifo->limit }; + r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(opt)); + if (r < 0) + return r; + + return 0; +} + +int config_parse_pfifo_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + FirstInFirstOut *fifo; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(ltype, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + switch (qdisc->kind) { + case QDISC_KIND_PFIFO: + fifo = PFIFO(qdisc); + break; + case QDISC_KIND_PFIFO_HEAD_DROP: + fifo = PFIFO_HEAD_DROP(qdisc); + break; + default: + assert_not_reached(); + } + + if (isempty(rvalue)) { + fifo->limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &fifo->limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_bfifo_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + FirstInFirstOut *fifo; + uint64_t u; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_BFIFO, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fifo = BFIFO(qdisc); + + if (isempty(rvalue)) { + fifo->limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_size(rvalue, 1024, &u); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (u > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + fifo->limit = (uint32_t) u; + + TAKE_PTR(qdisc); + return 0; +} + +const QDiscVTable pfifo_vtable = { + .object_size = sizeof(FirstInFirstOut), + .tca_kind = "pfifo", + .fill_message = fifo_fill_message, +}; + +const QDiscVTable bfifo_vtable = { + .object_size = sizeof(FirstInFirstOut), + .tca_kind = "bfifo", + .fill_message = fifo_fill_message, +}; + +const QDiscVTable pfifo_head_drop_vtable = { + .object_size = sizeof(FirstInFirstOut), + .tca_kind = "pfifo_head_drop", + .fill_message = fifo_fill_message, +}; + +const QDiscVTable pfifo_fast_vtable = { + .object_size = sizeof(FirstInFirstOut), + .tca_kind = "pfifo_fast", +}; diff --git a/src/network/tc/fifo.h b/src/network/tc/fifo.h new file mode 100644 index 0000000..b9bbd09 --- /dev/null +++ b/src/network/tc/fifo.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct FirstInFirstOut { + QDisc meta; + + uint32_t limit; +} FirstInFirstOut; + +DEFINE_QDISC_CAST(PFIFO, FirstInFirstOut); +DEFINE_QDISC_CAST(BFIFO, FirstInFirstOut); +DEFINE_QDISC_CAST(PFIFO_HEAD_DROP, FirstInFirstOut); +DEFINE_QDISC_CAST(PFIFO_FAST, FirstInFirstOut); + +extern const QDiscVTable pfifo_vtable; +extern const QDiscVTable bfifo_vtable; +extern const QDiscVTable pfifo_head_drop_vtable; +extern const QDiscVTable pfifo_fast_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_pfifo_size); +CONFIG_PARSER_PROTOTYPE(config_parse_bfifo_size); diff --git a/src/network/tc/fq-codel.c b/src/network/tc/fq-codel.c new file mode 100644 index 0000000..124faf7 --- /dev/null +++ b/src/network/tc/fq-codel.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-util.h" +#include "strv.h" + +static int fair_queueing_controlled_delay_init(QDisc *qdisc) { + FairQueueingControlledDelay *fqcd; + + assert(qdisc); + + fqcd = FQ_CODEL(qdisc); + + fqcd->memory_limit = UINT32_MAX; + fqcd->ce_threshold_usec = USEC_INFINITY; + fqcd->ecn = -1; + + return 0; +} + +static int fair_queueing_controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + FairQueueingControlledDelay *fqcd; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(fqcd = FQ_CODEL(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq_codel"); + if (r < 0) + return r; + + if (fqcd->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_LIMIT, fqcd->packet_limit); + if (r < 0) + return r; + } + + if (fqcd->flows > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_FLOWS, fqcd->flows); + if (r < 0) + return r; + } + + if (fqcd->quantum > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_QUANTUM, fqcd->quantum); + if (r < 0) + return r; + } + + if (fqcd->interval_usec > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_INTERVAL, fqcd->interval_usec); + if (r < 0) + return r; + } + + if (fqcd->target_usec > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_TARGET, fqcd->target_usec); + if (r < 0) + return r; + } + + if (fqcd->ecn >= 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_ECN, fqcd->ecn); + if (r < 0) + return r; + } + + if (fqcd->ce_threshold_usec != USEC_INFINITY) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_CE_THRESHOLD, fqcd->ce_threshold_usec); + if (r < 0) + return r; + } + + if (fqcd->memory_limit != UINT32_MAX) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_MEMORY_LIMIT, fqcd->memory_limit); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_fair_queueing_controlled_delay_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueingControlledDelay *fqcd; + Network *network = ASSERT_PTR(data); + uint32_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fqcd = FQ_CODEL(qdisc); + + if (streq(lvalue, "PacketLimit")) + p = &fqcd->packet_limit; + else if (streq(lvalue, "Flows")) + p = &fqcd->flows; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, p); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_fair_queueing_controlled_delay_usec( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueingControlledDelay *fqcd; + Network *network = ASSERT_PTR(data); + usec_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fqcd = FQ_CODEL(qdisc); + + if (streq(lvalue, "TargetSec")) + p = &fqcd->target_usec; + else if (streq(lvalue, "IntervalSec")) + p = &fqcd->interval_usec; + else if (streq(lvalue, "CEThresholdSec")) + p = &fqcd->ce_threshold_usec; + else + assert_not_reached(); + + if (isempty(rvalue)) { + if (streq(lvalue, "CEThresholdSec")) + *p = USEC_INFINITY; + else + *p = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_sec(rvalue, p); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_fair_queueing_controlled_delay_bool( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueingControlledDelay *fqcd; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fqcd = FQ_CODEL(qdisc); + + r = parse_tristate(rvalue, &fqcd->ecn); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_fair_queueing_controlled_delay_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueingControlledDelay *fqcd; + Network *network = ASSERT_PTR(data); + uint64_t sz; + uint32_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fqcd = FQ_CODEL(qdisc); + + if (STR_IN_SET(lvalue, "MemoryLimitBytes", "MemoryLimit")) + p = &fqcd->memory_limit; + else if (STR_IN_SET(lvalue, "QuantumBytes", "Quantum")) + p = &fqcd->quantum; + else + assert_not_reached(); + + if (isempty(rvalue)) { + if (STR_IN_SET(lvalue, "MemoryLimitBytes", "MemoryLimit")) + *p = UINT32_MAX; + else + *p = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_size(rvalue, 1024, &sz); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (sz >= UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Specified '%s=' is too large, ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + *p = sz; + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable fq_codel_vtable = { + .object_size = sizeof(FairQueueingControlledDelay), + .tca_kind = "fq_codel", + .init = fair_queueing_controlled_delay_init, + .fill_message = fair_queueing_controlled_delay_fill_message, +}; diff --git a/src/network/tc/fq-codel.h b/src/network/tc/fq-codel.h new file mode 100644 index 0000000..2553c59 --- /dev/null +++ b/src/network/tc/fq-codel.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "time-util.h" + +typedef struct FairQueueingControlledDelay { + QDisc meta; + + uint32_t packet_limit; + uint32_t flows; + uint32_t quantum; + uint32_t memory_limit; + usec_t target_usec; + usec_t interval_usec; + usec_t ce_threshold_usec; + int ecn; +} FairQueueingControlledDelay; + +DEFINE_QDISC_CAST(FQ_CODEL, FairQueueingControlledDelay); +extern const QDiscVTable fq_codel_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_u32); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_usec); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_bool); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_size); diff --git a/src/network/tc/fq-pie.c b/src/network/tc/fq-pie.c new file mode 100644 index 0000000..c8b2e7b --- /dev/null +++ b/src/network/tc/fq-pie.c @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "fq-pie.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" + +static int fq_pie_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + FlowQueuePIE *fq_pie; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(fq_pie = FQ_PIE(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq_pie"); + if (r < 0) + return r; + + if (fq_pie->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_PIE_LIMIT, fq_pie->packet_limit); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_fq_pie_packet_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FlowQueuePIE *fq_pie; + Network *network = ASSERT_PTR(data); + uint32_t val; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ_PIE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + + fq_pie = FQ_PIE(qdisc); + + if (isempty(rvalue)) { + fq_pie->packet_limit = 0; + + qdisc = NULL; + return 0; + } + + r = safe_atou32(rvalue, &val); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (val == 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + fq_pie->packet_limit = val; + qdisc = NULL; + + return 0; +} + +const QDiscVTable fq_pie_vtable = { + .object_size = sizeof(FlowQueuePIE), + .tca_kind = "fq_pie", + .fill_message = fq_pie_fill_message, +}; diff --git a/src/network/tc/fq-pie.h b/src/network/tc/fq-pie.h new file mode 100644 index 0000000..51fb626 --- /dev/null +++ b/src/network/tc/fq-pie.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct FlowQueuePIE { + QDisc meta; + + uint32_t packet_limit; +} FlowQueuePIE; + +DEFINE_QDISC_CAST(FQ_PIE, FlowQueuePIE); +extern const QDiscVTable fq_pie_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_fq_pie_packet_limit); diff --git a/src/network/tc/fq.c b/src/network/tc/fq.c new file mode 100644 index 0000000..74785c9 --- /dev/null +++ b/src/network/tc/fq.c @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "fq.h" +#include "logarithm.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" +#include "strv.h" + +static int fair_queueing_init(QDisc *qdisc) { + FairQueueing *fq; + + assert(qdisc); + + fq = FQ(qdisc); + + fq->pacing = -1; + fq->ce_threshold_usec = USEC_INFINITY; + + return 0; +} + +static int fair_queueing_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + FairQueueing *fq; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(fq = FQ(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq"); + if (r < 0) + return r; + + if (fq->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_PLIMIT, fq->packet_limit); + if (r < 0) + return r; + } + + if (fq->flow_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_FLOW_PLIMIT, fq->flow_limit); + if (r < 0) + return r; + } + + if (fq->quantum > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_QUANTUM, fq->quantum); + if (r < 0) + return r; + } + + if (fq->initial_quantum > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_INITIAL_QUANTUM, fq->initial_quantum); + if (r < 0) + return r; + } + + if (fq->pacing >= 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_RATE_ENABLE, fq->pacing); + if (r < 0) + return r; + } + + if (fq->max_rate > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_FLOW_MAX_RATE, fq->max_rate); + if (r < 0) + return r; + } + + if (fq->buckets > 0) { + uint32_t l; + + l = log2u(fq->buckets); + r = sd_netlink_message_append_u32(req, TCA_FQ_BUCKETS_LOG, l); + if (r < 0) + return r; + } + + if (fq->orphan_mask > 0) { + r = sd_netlink_message_append_u32(req, TCA_FQ_ORPHAN_MASK, fq->orphan_mask); + if (r < 0) + return r; + } + + if (fq->ce_threshold_usec != USEC_INFINITY) { + r = sd_netlink_message_append_u32(req, TCA_FQ_CE_THRESHOLD, fq->ce_threshold_usec); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_fair_queueing_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueing *fq; + Network *network = ASSERT_PTR(data); + uint32_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fq = FQ(qdisc); + + if (streq(lvalue, "PacketLimit")) + p = &fq->packet_limit; + else if (streq(lvalue, "FlowLimit")) + p = &fq->flow_limit; + else if (streq(lvalue, "Buckets")) + p = &fq->buckets; + else if (streq(lvalue, "OrphanMask")) + p = &fq->orphan_mask; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + qdisc = NULL; + return 0; + } + + r = safe_atou32(rvalue, p); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + qdisc = NULL; + + return 0; +} + +int config_parse_fair_queueing_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueing *fq; + Network *network = ASSERT_PTR(data); + uint64_t sz; + uint32_t *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fq = FQ(qdisc); + + if (STR_IN_SET(lvalue, "QuantumBytes", "Quantum")) + p = &fq->quantum; + else if (STR_IN_SET(lvalue, "InitialQuantumBytes", "InitialQuantum")) + p = &fq->initial_quantum; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + qdisc = NULL; + return 0; + } + + r = parse_size(rvalue, 1024, &sz); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (sz > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Specified '%s=' is too large, ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + *p = sz; + qdisc = NULL; + + return 0; +} + +int config_parse_fair_queueing_bool( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueing *fq; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fq = FQ(qdisc); + + r = parse_tristate(rvalue, &fq->pacing); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + fq->pacing = r; + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_fair_queueing_usec( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueing *fq; + Network *network = ASSERT_PTR(data); + usec_t sec; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fq = FQ(qdisc); + + if (isempty(rvalue)) { + fq->ce_threshold_usec = USEC_INFINITY; + + qdisc = NULL; + return 0; + } + + r = parse_sec(rvalue, &sec); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (sec > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Specified '%s=' is too large, ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + fq->ce_threshold_usec = sec; + qdisc = NULL; + + return 0; +} + +int config_parse_fair_queueing_max_rate( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + FairQueueing *fq; + Network *network = ASSERT_PTR(data); + uint64_t sz; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + fq = FQ(qdisc); + + if (isempty(rvalue)) { + fq->max_rate = 0; + + qdisc = NULL; + return 0; + } + + r = parse_size(rvalue, 1000, &sz); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (sz / 8 > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Specified '%s=' is too large, ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + fq->max_rate = sz / 8; + qdisc = NULL; + + return 0; +} + +const QDiscVTable fq_vtable = { + .init = fair_queueing_init, + .object_size = sizeof(FairQueueing), + .tca_kind = "fq", + .fill_message = fair_queueing_fill_message, +}; diff --git a/src/network/tc/fq.h b/src/network/tc/fq.h new file mode 100644 index 0000000..77469c4 --- /dev/null +++ b/src/network/tc/fq.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct FairQueueing { + QDisc meta; + + uint32_t packet_limit; + uint32_t flow_limit; + uint32_t quantum; + uint32_t initial_quantum; + uint32_t max_rate; + uint32_t buckets; + uint32_t orphan_mask; + int pacing; + usec_t ce_threshold_usec; +} FairQueueing; + +DEFINE_QDISC_CAST(FQ, FairQueueing); +extern const QDiscVTable fq_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_u32); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_size); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_bool); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_usec); +CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_max_rate); diff --git a/src/network/tc/gred.c b/src/network/tc/gred.c new file mode 100644 index 0000000..2efb02c --- /dev/null +++ b/src/network/tc/gred.c @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-util.h" + +static int generic_random_early_detection_init(QDisc *qdisc) { + GenericRandomEarlyDetection *gred; + + assert(qdisc); + + gred = GRED(qdisc); + + gred->grio = -1; + + return 0; +} + +static int generic_random_early_detection_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + GenericRandomEarlyDetection *gred; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(gred = GRED(qdisc)); + + const struct tc_gred_sopt opt = { + .DPs = gred->virtual_queues, + .def_DP = gred->default_virtual_queue, + .grio = gred->grio, + }; + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "gred"); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_GRED_DPS, &opt, sizeof(opt)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +static int generic_random_early_detection_verify(QDisc *qdisc) { + GenericRandomEarlyDetection *gred = GRED(qdisc); + + if (gred->default_virtual_queue >= gred->virtual_queues) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: DefaultVirtualQueue= must be less than VirtualQueues=. " + "Ignoring [GenericRandomEarlyDetection] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + return 0; +} + +int config_parse_generic_random_early_detection_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + GenericRandomEarlyDetection *gred; + Network *network = ASSERT_PTR(data); + uint32_t *p; + uint32_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_GRED, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + gred = GRED(qdisc); + + if (streq(lvalue, "VirtualQueues")) + p = &gred->virtual_queues; + else if (streq(lvalue, "DefaultVirtualQueue")) + p = &gred->default_virtual_queue; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (v > MAX_DPs) + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + + *p = v; + TAKE_PTR(qdisc); + + return 0; +} +int config_parse_generic_random_early_detection_bool( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + GenericRandomEarlyDetection *gred; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_GRED, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + gred = GRED(qdisc); + + r = parse_tristate(rvalue, &gred->grio); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable gred_vtable = { + .object_size = sizeof(GenericRandomEarlyDetection), + .tca_kind = "gred", + .init = generic_random_early_detection_init, + .fill_message = generic_random_early_detection_fill_message, + .verify = generic_random_early_detection_verify, +}; diff --git a/src/network/tc/gred.h b/src/network/tc/gred.h new file mode 100644 index 0000000..c084ff1 --- /dev/null +++ b/src/network/tc/gred.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct GenericRandomEarlyDetection { + QDisc meta; + + uint32_t virtual_queues; + uint32_t default_virtual_queue; + int grio; +} GenericRandomEarlyDetection; + +DEFINE_QDISC_CAST(GRED, GenericRandomEarlyDetection); +extern const QDiscVTable gred_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_generic_random_early_detection_u32); +CONFIG_PARSER_PROTOTYPE(config_parse_generic_random_early_detection_bool); diff --git a/src/network/tc/hhf.c b/src/network/tc/hhf.c new file mode 100644 index 0000000..d44522f --- /dev/null +++ b/src/network/tc/hhf.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "hhf.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" + +static int heavy_hitter_filter_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + HeavyHitterFilter *hhf; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(hhf = HHF(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "hhf"); + if (r < 0) + return r; + + if (hhf->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_HHF_BACKLOG_LIMIT, hhf->packet_limit); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_heavy_hitter_filter_packet_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + HeavyHitterFilter *hhf; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_HHF, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + hhf = HHF(qdisc); + + if (isempty(rvalue)) { + hhf->packet_limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &hhf->packet_limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable hhf_vtable = { + .object_size = sizeof(HeavyHitterFilter), + .tca_kind = "hhf", + .fill_message = heavy_hitter_filter_fill_message, +}; diff --git a/src/network/tc/hhf.h b/src/network/tc/hhf.h new file mode 100644 index 0000000..04caaa8 --- /dev/null +++ b/src/network/tc/hhf.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct HeavyHitterFilter { + QDisc meta; + + uint32_t packet_limit; +} HeavyHitterFilter; + +DEFINE_QDISC_CAST(HHF, HeavyHitterFilter); +extern const QDiscVTable hhf_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_heavy_hitter_filter_packet_limit); diff --git a/src/network/tc/htb.c b/src/network/tc/htb.c new file mode 100644 index 0000000..eb2c8cf --- /dev/null +++ b/src/network/tc/htb.c @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "networkd-link.h" +#include "parse-util.h" +#include "qdisc.h" +#include "htb.h" +#include "string-util.h" +#include "tc-util.h" + +#define HTB_DEFAULT_RATE_TO_QUANTUM 10 +#define HTB_DEFAULT_MTU 1600 /* Ethernet packet length */ + +static int hierarchy_token_bucket_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + HierarchyTokenBucket *htb; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(htb = HTB(qdisc)); + + struct tc_htb_glob opt = { + .version = 3, + .rate2quantum = htb->rate_to_quantum, + .defcls = htb->default_class, + }; + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "htb"); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_HTB_INIT, &opt, sizeof(opt)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + return 0; +} + +int config_parse_hierarchy_token_bucket_default_class( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + HierarchyTokenBucket *htb; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_HTB, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + htb = HTB(qdisc); + + if (isempty(rvalue)) { + htb->default_class = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32_full(rvalue, 16, &htb->default_class); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_hierarchy_token_bucket_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + HierarchyTokenBucket *htb; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_HTB, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + htb = HTB(qdisc); + + if (isempty(rvalue)) { + htb->rate_to_quantum = HTB_DEFAULT_RATE_TO_QUANTUM; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &htb->rate_to_quantum); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +static int hierarchy_token_bucket_init(QDisc *qdisc) { + HierarchyTokenBucket *htb; + + assert(qdisc); + + htb = HTB(qdisc); + + htb->rate_to_quantum = HTB_DEFAULT_RATE_TO_QUANTUM; + + return 0; +} + +const QDiscVTable htb_vtable = { + .object_size = sizeof(HierarchyTokenBucket), + .tca_kind = "htb", + .fill_message = hierarchy_token_bucket_fill_message, + .init = hierarchy_token_bucket_init, +}; + +static int hierarchy_token_bucket_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) { + HierarchyTokenBucketClass *htb; + uint32_t rtab[256], ctab[256]; + int r; + + assert(link); + assert(tclass); + assert(req); + + assert_se(htb = TCLASS_TO_HTB(tclass)); + + struct tc_htb_opt opt = { + .prio = htb->priority, + .quantum = htb->quantum, + .rate.rate = (htb->rate >= (1ULL << 32)) ? ~0U : htb->rate, + .ceil.rate = (htb->ceil_rate >= (1ULL << 32)) ? ~0U : htb->ceil_rate, + .rate.overhead = htb->overhead, + .ceil.overhead = htb->overhead, + }; + + r = tc_transmit_time(htb->rate, htb->buffer, &opt.buffer); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate buffer size: %m"); + + r = tc_transmit_time(htb->ceil_rate, htb->ceil_buffer, &opt.cbuffer); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate ceil buffer size: %m"); + + r = tc_fill_ratespec_and_table(&opt.rate, rtab, htb->mtu); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate rate table: %m"); + + r = tc_fill_ratespec_and_table(&opt.ceil, ctab, htb->mtu); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate ceil rate table: %m"); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "htb"); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_HTB_PARMS, &opt, sizeof(opt)); + if (r < 0) + return r; + + if (htb->rate >= (1ULL << 32)) { + r = sd_netlink_message_append_u64(req, TCA_HTB_RATE64, htb->rate); + if (r < 0) + return r; + } + + if (htb->ceil_rate >= (1ULL << 32)) { + r = sd_netlink_message_append_u64(req, TCA_HTB_CEIL64, htb->ceil_rate); + if (r < 0) + return r; + } + + r = sd_netlink_message_append_data(req, TCA_HTB_RTAB, rtab, sizeof(rtab)); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_HTB_CTAB, ctab, sizeof(ctab)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_hierarchy_token_bucket_class_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + HierarchyTokenBucketClass *htb; + Network *network = ASSERT_PTR(data); + uint32_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + htb = TCLASS_TO_HTB(tclass); + + if (isempty(rvalue)) { + htb->priority = 0; + tclass = NULL; + return 0; + } + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + htb->priority = v; + tclass = NULL; + + return 0; +} + +int config_parse_hierarchy_token_bucket_class_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + HierarchyTokenBucketClass *htb; + Network *network = ASSERT_PTR(data); + uint64_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + htb = TCLASS_TO_HTB(tclass); + + if (isempty(rvalue)) { + if (streq(lvalue, "QuantumBytes")) + htb->quantum = 0; + else if (streq(lvalue, "MTUBytes")) + htb->mtu = HTB_DEFAULT_MTU; + else if (streq(lvalue, "OverheadBytes")) + htb->overhead = 0; + else if (streq(lvalue, "BufferBytes")) + htb->buffer = 0; + else if (streq(lvalue, "CeilBufferBytes")) + htb->ceil_buffer = 0; + else + assert_not_reached(); + + tclass = NULL; + return 0; + } + + r = parse_size(rvalue, 1024, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if ((streq(lvalue, "OverheadBytes") && v > UINT16_MAX) || v > UINT32_MAX) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (streq(lvalue, "QuantumBytes")) + htb->quantum = v; + else if (streq(lvalue, "OverheadBytes")) + htb->overhead = v; + else if (streq(lvalue, "MTUBytes")) + htb->mtu = v; + else if (streq(lvalue, "BufferBytes")) + htb->buffer = v; + else if (streq(lvalue, "CeilBufferBytes")) + htb->ceil_buffer = v; + else + assert_not_reached(); + + tclass = NULL; + + return 0; +} + +int config_parse_hierarchy_token_bucket_class_rate( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + HierarchyTokenBucketClass *htb; + Network *network = ASSERT_PTR(data); + uint64_t *v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + htb = TCLASS_TO_HTB(tclass); + if (streq(lvalue, "Rate")) + v = &htb->rate; + else if (streq(lvalue, "CeilRate")) + v = &htb->ceil_rate; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *v = 0; + + tclass = NULL; + return 0; + } + + r = parse_size(rvalue, 1000, v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + *v /= 8; + tclass = NULL; + + return 0; +} + +static int hierarchy_token_bucket_class_init(TClass *tclass) { + HierarchyTokenBucketClass *htb; + + assert(tclass); + + htb = TCLASS_TO_HTB(tclass); + + htb->mtu = HTB_DEFAULT_MTU; + + return 0; +} + +static int hierarchy_token_bucket_class_verify(TClass *tclass) { + HierarchyTokenBucketClass *htb; + uint32_t hz; + int r; + + assert(tclass); + + htb = TCLASS_TO_HTB(tclass); + + if (htb->rate == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Rate= is mandatory. " + "Ignoring [HierarchyTokenBucketClass] section from line %u.", + tclass->section->filename, tclass->section->line); + + /* if CeilRate= setting is missing, use the same as Rate= */ + if (htb->ceil_rate == 0) + htb->ceil_rate = htb->rate; + + r = tc_init(NULL, &hz); + if (r < 0) + return log_error_errno(r, "Failed to read /proc/net/psched: %m"); + + if (htb->buffer == 0) + htb->buffer = htb->rate / hz + htb->mtu; + if (htb->ceil_buffer == 0) + htb->ceil_buffer = htb->ceil_rate / hz + htb->mtu; + + return 0; +} + +const TClassVTable htb_tclass_vtable = { + .object_size = sizeof(HierarchyTokenBucketClass), + .tca_kind = "htb", + .fill_message = hierarchy_token_bucket_class_fill_message, + .init = hierarchy_token_bucket_class_init, + .verify = hierarchy_token_bucket_class_verify, +}; diff --git a/src/network/tc/htb.h b/src/network/tc/htb.h new file mode 100644 index 0000000..55644db --- /dev/null +++ b/src/network/tc/htb.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "tclass.h" + +typedef struct HierarchyTokenBucket { + QDisc meta; + + uint32_t default_class; + uint32_t rate_to_quantum; +} HierarchyTokenBucket; + +DEFINE_QDISC_CAST(HTB, HierarchyTokenBucket); +extern const QDiscVTable htb_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_default_class); +CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_u32); + +typedef struct HierarchyTokenBucketClass { + TClass meta; + + uint32_t priority; + uint32_t quantum; + uint32_t mtu; + uint16_t overhead; + uint64_t rate; + uint32_t buffer; + uint64_t ceil_rate; + uint32_t ceil_buffer; +} HierarchyTokenBucketClass; + +DEFINE_TCLASS_CAST(HTB, HierarchyTokenBucketClass); +extern const TClassVTable htb_tclass_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_u32); +CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_size); +CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_rate); diff --git a/src/network/tc/netem.c b/src/network/tc/netem.c new file mode 100644 index 0000000..6a63221 --- /dev/null +++ b/src/network/tc/netem.c @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netem.h" +#include "netlink-util.h" +#include "networkd-manager.h" +#include "parse-util.h" +#include "qdisc.h" +#include "strv.h" +#include "tc-util.h" + +static int network_emulator_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + NetworkEmulator *ne; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(ne = NETEM(qdisc)); + + struct tc_netem_qopt opt = { + .limit = ne->limit > 0 ? ne->limit : 1000, + .loss = ne->loss, + .duplicate = ne->duplicate, + }; + + if (ne->delay != USEC_INFINITY) { + r = tc_time_to_tick(ne->delay, &opt.latency); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate latency in TCA_OPTION: %m"); + } + + if (ne->jitter != USEC_INFINITY) { + r = tc_time_to_tick(ne->jitter, &opt.jitter); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate jitter in TCA_OPTION: %m"); + } + + r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(opt)); + if (r < 0) + return r; + + return 0; +} + +int config_parse_network_emulator_delay( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + NetworkEmulator *ne; + usec_t u; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ne = NETEM(qdisc); + + if (isempty(rvalue)) { + if (STR_IN_SET(lvalue, "DelaySec", "NetworkEmulatorDelaySec")) + ne->delay = USEC_INFINITY; + else if (STR_IN_SET(lvalue, "DelayJitterSec", "NetworkEmulatorDelayJitterSec")) + ne->jitter = USEC_INFINITY; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_sec(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (STR_IN_SET(lvalue, "DelaySec", "NetworkEmulatorDelaySec")) + ne->delay = u; + else if (STR_IN_SET(lvalue, "DelayJitterSec", "NetworkEmulatorDelayJitterSec")) + ne->jitter = u; + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_network_emulator_rate( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + NetworkEmulator *ne; + uint32_t rate; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ne = NETEM(qdisc); + + if (isempty(rvalue)) { + if (STR_IN_SET(lvalue, "LossRate", "NetworkEmulatorLossRate")) + ne->loss = 0; + else if (STR_IN_SET(lvalue, "DuplicateRate", "NetworkEmulatorDuplicateRate")) + ne->duplicate = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_tc_percent(rvalue, &rate); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (STR_IN_SET(lvalue, "LossRate", "NetworkEmulatorLossRate")) + ne->loss = rate; + else if (STR_IN_SET(lvalue, "DuplicateRate", "NetworkEmulatorDuplicateRate")) + ne->duplicate = rate; + + TAKE_PTR(qdisc); + return 0; +} + +int config_parse_network_emulator_packet_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + NetworkEmulator *ne; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + ne = NETEM(qdisc); + + if (isempty(rvalue)) { + ne->limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou(rvalue, &ne->limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + return 0; +} + +const QDiscVTable netem_vtable = { + .object_size = sizeof(NetworkEmulator), + .tca_kind = "netem", + .fill_message = network_emulator_fill_message, +}; diff --git a/src/network/tc/netem.h b/src/network/tc/netem.h new file mode 100644 index 0000000..d58d5ac --- /dev/null +++ b/src/network/tc/netem.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "time-util.h" + +typedef struct NetworkEmulator { + QDisc meta; + + usec_t delay; + usec_t jitter; + + uint32_t limit; + uint32_t loss; + uint32_t duplicate; +} NetworkEmulator; + +DEFINE_QDISC_CAST(NETEM, NetworkEmulator); +extern const QDiscVTable netem_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_delay); +CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_rate); +CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_packet_limit); diff --git a/src/network/tc/pie.c b/src/network/tc/pie.c new file mode 100644 index 0000000..c9b171b --- /dev/null +++ b/src/network/tc/pie.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "pie.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "string-util.h" + +static int pie_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + ProportionalIntegralControllerEnhanced *pie; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(pie = PIE(qdisc)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "pie"); + if (r < 0) + return r; + + if (pie->packet_limit > 0) { + r = sd_netlink_message_append_u32(req, TCA_PIE_LIMIT, pie->packet_limit); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_pie_packet_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + ProportionalIntegralControllerEnhanced *pie; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_PIE, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + pie = PIE(qdisc); + + if (isempty(rvalue)) { + pie->packet_limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &pie->packet_limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable pie_vtable = { + .object_size = sizeof(ProportionalIntegralControllerEnhanced), + .tca_kind = "pie", + .fill_message = pie_fill_message, +}; diff --git a/src/network/tc/pie.h b/src/network/tc/pie.h new file mode 100644 index 0000000..40a114e --- /dev/null +++ b/src/network/tc/pie.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct ProportionalIntegralControllerEnhanced { + QDisc meta; + + uint32_t packet_limit; +} ProportionalIntegralControllerEnhanced; + +DEFINE_QDISC_CAST(PIE, ProportionalIntegralControllerEnhanced); +extern const QDiscVTable pie_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_pie_packet_limit); diff --git a/src/network/tc/qdisc.c b/src/network/tc/qdisc.c new file mode 100644 index 0000000..f20f410 --- /dev/null +++ b/src/network/tc/qdisc.c @@ -0,0 +1,715 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "in-addr-util.h" +#include "netlink-util.h" +#include "networkd-link.h" +#include "networkd-manager.h" +#include "networkd-network.h" +#include "networkd-queue.h" +#include "parse-util.h" +#include "qdisc.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" +#include "tc-util.h" + +const QDiscVTable * const qdisc_vtable[_QDISC_KIND_MAX] = { + [QDISC_KIND_BFIFO] = &bfifo_vtable, + [QDISC_KIND_CAKE] = &cake_vtable, + [QDISC_KIND_CODEL] = &codel_vtable, + [QDISC_KIND_DRR] = &drr_vtable, + [QDISC_KIND_ETS] = &ets_vtable, + [QDISC_KIND_FQ] = &fq_vtable, + [QDISC_KIND_FQ_CODEL] = &fq_codel_vtable, + [QDISC_KIND_FQ_PIE] = &fq_pie_vtable, + [QDISC_KIND_GRED] = &gred_vtable, + [QDISC_KIND_HHF] = &hhf_vtable, + [QDISC_KIND_HTB] = &htb_vtable, + [QDISC_KIND_NETEM] = &netem_vtable, + [QDISC_KIND_PIE] = &pie_vtable, + [QDISC_KIND_QFQ] = &qfq_vtable, + [QDISC_KIND_PFIFO] = &pfifo_vtable, + [QDISC_KIND_PFIFO_FAST] = &pfifo_fast_vtable, + [QDISC_KIND_PFIFO_HEAD_DROP] = &pfifo_head_drop_vtable, + [QDISC_KIND_SFB] = &sfb_vtable, + [QDISC_KIND_SFQ] = &sfq_vtable, + [QDISC_KIND_TBF] = &tbf_vtable, + [QDISC_KIND_TEQL] = &teql_vtable, +}; + +static int qdisc_new(QDiscKind kind, QDisc **ret) { + _cleanup_(qdisc_freep) QDisc *qdisc = NULL; + int r; + + if (kind == _QDISC_KIND_INVALID) { + qdisc = new(QDisc, 1); + if (!qdisc) + return -ENOMEM; + + *qdisc = (QDisc) { + .parent = TC_H_ROOT, + .kind = kind, + }; + } else { + assert(kind >= 0 && kind < _QDISC_KIND_MAX); + qdisc = malloc0(qdisc_vtable[kind]->object_size); + if (!qdisc) + return -ENOMEM; + + qdisc->parent = TC_H_ROOT; + qdisc->kind = kind; + + if (QDISC_VTABLE(qdisc)->init) { + r = QDISC_VTABLE(qdisc)->init(qdisc); + if (r < 0) + return r; + } + } + + *ret = TAKE_PTR(qdisc); + + return 0; +} + +int qdisc_new_static(QDiscKind kind, Network *network, const char *filename, unsigned section_line, QDisc **ret) { + _cleanup_(config_section_freep) ConfigSection *n = NULL; + _cleanup_(qdisc_freep) QDisc *qdisc = NULL; + QDisc *existing; + int r; + + assert(network); + assert(ret); + assert(filename); + assert(section_line > 0); + + r = config_section_new(filename, section_line, &n); + if (r < 0) + return r; + + existing = hashmap_get(network->qdiscs_by_section, n); + if (existing) { + if (existing->kind != _QDISC_KIND_INVALID && + kind != _QDISC_KIND_INVALID && + existing->kind != kind) + return -EINVAL; + + if (existing->kind == kind || kind == _QDISC_KIND_INVALID) { + *ret = existing; + return 0; + } + } + + r = qdisc_new(kind, &qdisc); + if (r < 0) + return r; + + if (existing) { + qdisc->handle = existing->handle; + qdisc->parent = existing->parent; + qdisc->tca_kind = TAKE_PTR(existing->tca_kind); + + qdisc_free(existing); + } + + qdisc->network = network; + qdisc->section = TAKE_PTR(n); + qdisc->source = NETWORK_CONFIG_SOURCE_STATIC; + + r = hashmap_ensure_put(&network->qdiscs_by_section, &config_section_hash_ops, qdisc->section, qdisc); + if (r < 0) + return r; + + *ret = TAKE_PTR(qdisc); + return 0; +} + +QDisc* qdisc_free(QDisc *qdisc) { + if (!qdisc) + return NULL; + + if (qdisc->network && qdisc->section) + hashmap_remove(qdisc->network->qdiscs_by_section, qdisc->section); + + config_section_free(qdisc->section); + + if (qdisc->link) + set_remove(qdisc->link->qdiscs, qdisc); + + free(qdisc->tca_kind); + return mfree(qdisc); +} + +static const char *qdisc_get_tca_kind(const QDisc *qdisc) { + assert(qdisc); + + return (QDISC_VTABLE(qdisc) && QDISC_VTABLE(qdisc)->tca_kind) ? + QDISC_VTABLE(qdisc)->tca_kind : qdisc->tca_kind; +} + +static void qdisc_hash_func(const QDisc *qdisc, struct siphash *state) { + assert(qdisc); + assert(state); + + siphash24_compress(&qdisc->handle, sizeof(qdisc->handle), state); + siphash24_compress(&qdisc->parent, sizeof(qdisc->parent), state); + siphash24_compress_string(qdisc_get_tca_kind(qdisc), state); +} + +static int qdisc_compare_func(const QDisc *a, const QDisc *b) { + int r; + + assert(a); + assert(b); + + r = CMP(a->handle, b->handle); + if (r != 0) + return r; + + r = CMP(a->parent, b->parent); + if (r != 0) + return r; + + return strcmp_ptr(qdisc_get_tca_kind(a), qdisc_get_tca_kind(b)); +} + +DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR( + qdisc_hash_ops, + QDisc, + qdisc_hash_func, + qdisc_compare_func, + qdisc_free); + +static int qdisc_get(Link *link, const QDisc *in, QDisc **ret) { + QDisc *existing; + + assert(link); + assert(in); + + existing = set_get(link->qdiscs, in); + if (!existing) + return -ENOENT; + + if (ret) + *ret = existing; + return 0; +} + +static int qdisc_add(Link *link, QDisc *qdisc) { + int r; + + assert(link); + assert(qdisc); + + r = set_ensure_put(&link->qdiscs, &qdisc_hash_ops, qdisc); + if (r < 0) + return r; + if (r == 0) + return -EEXIST; + + qdisc->link = link; + return 0; +} + +static int qdisc_dup(const QDisc *src, QDisc **ret) { + _cleanup_(qdisc_freep) QDisc *dst = NULL; + + assert(src); + assert(ret); + + if (QDISC_VTABLE(src)) + dst = memdup(src, QDISC_VTABLE(src)->object_size); + else + dst = newdup(QDisc, src, 1); + if (!dst) + return -ENOMEM; + + /* clear all pointers */ + dst->network = NULL; + dst->section = NULL; + dst->link = NULL; + dst->tca_kind = NULL; + + if (src->tca_kind) { + dst->tca_kind = strdup(src->tca_kind); + if (!dst->tca_kind) + return -ENOMEM; + } + + *ret = TAKE_PTR(dst); + return 0; +} + +static void log_qdisc_debug(QDisc *qdisc, Link *link, const char *str) { + _cleanup_free_ char *state = NULL; + + assert(qdisc); + assert(str); + + if (!DEBUG_LOGGING) + return; + + (void) network_config_state_to_string_alloc(qdisc->state, &state); + + log_link_debug(link, "%s %s QDisc (%s): handle=%"PRIx32":%"PRIx32", parent=%"PRIx32":%"PRIx32", kind=%s", + str, strna(network_config_source_to_string(qdisc->source)), strna(state), + TC_H_MAJ(qdisc->handle) >> 16, TC_H_MIN(qdisc->handle), + TC_H_MAJ(qdisc->parent) >> 16, TC_H_MIN(qdisc->parent), + strna(qdisc_get_tca_kind(qdisc))); +} + +int link_find_qdisc(Link *link, uint32_t handle, const char *kind, QDisc **ret) { + QDisc *qdisc; + + assert(link); + + SET_FOREACH(qdisc, link->qdiscs) { + if (qdisc->handle != handle) + continue; + + if (!qdisc_exists(qdisc)) + continue; + + if (kind && !streq_ptr(kind, qdisc_get_tca_kind(qdisc))) + continue; + + if (ret) + *ret = qdisc; + return 0; + } + + return -ENOENT; +} + +QDisc* qdisc_drop(QDisc *qdisc) { + TClass *tclass; + Link *link; + + assert(qdisc); + + link = ASSERT_PTR(qdisc->link); + + /* also drop all child classes assigned to the qdisc. */ + SET_FOREACH(tclass, link->tclasses) { + if (TC_H_MAJ(tclass->classid) != qdisc->handle) + continue; + + tclass_drop(tclass); + } + + qdisc_enter_removed(qdisc); + + if (qdisc->state == 0) { + log_qdisc_debug(qdisc, link, "Forgetting"); + qdisc = qdisc_free(qdisc); + } else + log_qdisc_debug(qdisc, link, "Removed"); + + return qdisc; +} + +static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, Link *link, QDisc *qdisc) { + int r; + + assert(m); + assert(link); + + r = sd_netlink_message_get_errno(m); + if (r < 0 && r != -EEXIST) { + log_link_message_error_errno(link, m, r, "Could not set QDisc"); + link_enter_failed(link); + return 1; + } + + if (link->tc_messages == 0) { + log_link_debug(link, "Traffic control configured"); + link->tc_configured = true; + link_check_ready(link); + } + + return 1; +} + +static int qdisc_configure(QDisc *qdisc, Link *link, Request *req) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + assert(qdisc); + assert(link); + assert(link->manager); + assert(link->manager->rtnl); + assert(link->ifindex > 0); + assert(req); + + log_qdisc_debug(qdisc, link, "Configuring"); + + r = sd_rtnl_message_new_traffic_control(link->manager->rtnl, &m, RTM_NEWQDISC, + link->ifindex, qdisc->handle, qdisc->parent); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, TCA_KIND, qdisc_get_tca_kind(qdisc)); + if (r < 0) + return r; + + if (QDISC_VTABLE(qdisc) && QDISC_VTABLE(qdisc)->fill_message) { + r = QDISC_VTABLE(qdisc)->fill_message(link, qdisc, m); + if (r < 0) + return r; + } + + return request_call_netlink_async(link->manager->rtnl, m, req); +} + +static bool qdisc_is_ready_to_configure(QDisc *qdisc, Link *link) { + assert(qdisc); + assert(link); + + if (!IN_SET(link->state, LINK_STATE_CONFIGURING, LINK_STATE_CONFIGURED)) + return false; + + /* TC_H_CLSACT == TC_H_INGRESS */ + if (!IN_SET(qdisc->parent, TC_H_ROOT, TC_H_CLSACT)) { + if (TC_H_MIN(qdisc->parent) == 0) { + if (link_find_qdisc(link, qdisc->parent, NULL, NULL) < 0) + return false; + } else { + if (link_find_tclass(link, qdisc->parent, NULL) < 0) + return false; + } + } + + if (QDISC_VTABLE(qdisc) && + QDISC_VTABLE(qdisc)->is_ready && + QDISC_VTABLE(qdisc)->is_ready(qdisc, link) <= 0) + return false; + + return true; +} + +static int qdisc_process_request(Request *req, Link *link, QDisc *qdisc) { + int r; + + assert(req); + assert(link); + assert(qdisc); + + if (!qdisc_is_ready_to_configure(qdisc, link)) + return 0; + + r = qdisc_configure(qdisc, link, req); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to configure QDisc: %m"); + + qdisc_enter_configuring(qdisc); + return 1; +} + +int link_request_qdisc(Link *link, QDisc *qdisc) { + QDisc *existing; + int r; + + assert(link); + assert(qdisc); + + if (qdisc_get(link, qdisc, &existing) < 0) { + _cleanup_(qdisc_freep) QDisc *tmp = NULL; + + r = qdisc_dup(qdisc, &tmp); + if (r < 0) + return log_oom(); + + r = qdisc_add(link, tmp); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to store QDisc: %m"); + + existing = TAKE_PTR(tmp); + } else + existing->source = qdisc->source; + + log_qdisc_debug(existing, link, "Requesting"); + r = link_queue_request_safe(link, REQUEST_TYPE_TC_QDISC, + existing, NULL, + qdisc_hash_func, + qdisc_compare_func, + qdisc_process_request, + &link->tc_messages, + qdisc_handler, + NULL); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to request QDisc: %m"); + if (r == 0) + return 0; + + qdisc_enter_requesting(existing); + return 1; +} + +int manager_rtnl_process_qdisc(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) { + _cleanup_(qdisc_freep) QDisc *tmp = NULL; + QDisc *qdisc = NULL; + Link *link; + uint16_t type; + int ifindex, r; + + assert(rtnl); + assert(message); + assert(m); + + if (sd_netlink_message_is_error(message)) { + r = sd_netlink_message_get_errno(message); + if (r < 0) + log_message_warning_errno(message, r, "rtnl: failed to receive QDisc message, ignoring"); + + return 0; + } + + r = sd_netlink_message_get_type(message, &type); + if (r < 0) { + log_warning_errno(r, "rtnl: could not get message type, ignoring: %m"); + return 0; + } else if (!IN_SET(type, RTM_NEWQDISC, RTM_DELQDISC)) { + log_warning("rtnl: received unexpected message type %u when processing QDisc, ignoring.", type); + return 0; + } + + r = sd_rtnl_message_traffic_control_get_ifindex(message, &ifindex); + if (r < 0) { + log_warning_errno(r, "rtnl: could not get ifindex from message, ignoring: %m"); + return 0; + } else if (ifindex <= 0) { + log_warning("rtnl: received QDisc message with invalid ifindex %d, ignoring.", ifindex); + return 0; + } + + if (link_get_by_index(m, ifindex, &link) < 0) { + if (!m->enumerating) + log_warning("rtnl: received QDisc for link '%d' we don't know about, ignoring.", ifindex); + return 0; + } + + r = qdisc_new(_QDISC_KIND_INVALID, &tmp); + if (r < 0) + return log_oom(); + + r = sd_rtnl_message_traffic_control_get_handle(message, &tmp->handle); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received QDisc message without handle, ignoring: %m"); + return 0; + } + + r = sd_rtnl_message_traffic_control_get_parent(message, &tmp->parent); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received QDisc message without parent, ignoring: %m"); + return 0; + } + + r = sd_netlink_message_read_string_strdup(message, TCA_KIND, &tmp->tca_kind); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received QDisc message without kind, ignoring: %m"); + return 0; + } + + (void) qdisc_get(link, tmp, &qdisc); + + switch (type) { + case RTM_NEWQDISC: + if (qdisc) { + qdisc_enter_configured(qdisc); + log_qdisc_debug(qdisc, link, "Received remembered"); + } else { + qdisc_enter_configured(tmp); + log_qdisc_debug(tmp, link, "Received new"); + + r = qdisc_add(link, tmp); + if (r < 0) { + log_link_warning_errno(link, r, "Failed to remember QDisc, ignoring: %m"); + return 0; + } + + qdisc = TAKE_PTR(tmp); + } + + if (!m->enumerating) { + /* Some kind of QDisc (e.g. tbf) also create an implicit class under the qdisc, but + * the kernel may not notify about the class. Hence, we need to enumerate classes. */ + r = link_enumerate_tclass(link, qdisc->handle); + if (r < 0) + log_link_warning_errno(link, r, "Failed to enumerate TClass, ignoring: %m"); + } + + break; + + case RTM_DELQDISC: + if (qdisc) + qdisc_drop(qdisc); + else + log_qdisc_debug(tmp, link, "Kernel removed unknown"); + + break; + + default: + assert_not_reached(); + } + + return 1; +} + +static int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) { + int r; + + assert(qdisc); + assert(has_root); + assert(has_clsact); + + if (section_is_invalid(qdisc->section)) + return -EINVAL; + + if (QDISC_VTABLE(qdisc) && QDISC_VTABLE(qdisc)->verify) { + r = QDISC_VTABLE(qdisc)->verify(qdisc); + if (r < 0) + return r; + } + + if (qdisc->parent == TC_H_ROOT) { + if (*has_root) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: More than one root qdisc section is defined. " + "Ignoring the qdisc section from line %u.", + qdisc->section->filename, qdisc->section->line); + *has_root = true; + } else if (qdisc->parent == TC_H_CLSACT) { /* TC_H_CLSACT == TC_H_INGRESS */ + if (*has_clsact) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: More than one clsact or ingress qdisc section is defined. " + "Ignoring the qdisc section from line %u.", + qdisc->section->filename, qdisc->section->line); + *has_clsact = true; + } + + return 0; +} + +void network_drop_invalid_qdisc(Network *network) { + bool has_root = false, has_clsact = false; + QDisc *qdisc; + + assert(network); + + HASHMAP_FOREACH(qdisc, network->qdiscs_by_section) + if (qdisc_section_verify(qdisc, &has_root, &has_clsact) < 0) + qdisc_free(qdisc); +} + +int config_parse_qdisc_parent( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(ltype, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + if (streq(rvalue, "root")) + qdisc->parent = TC_H_ROOT; + else if (streq(rvalue, "clsact")) { + qdisc->parent = TC_H_CLSACT; + qdisc->handle = TC_H_MAKE(TC_H_CLSACT, 0); + } else if (streq(rvalue, "ingress")) { + qdisc->parent = TC_H_INGRESS; + qdisc->handle = TC_H_MAKE(TC_H_INGRESS, 0); + } else { + r = parse_handle(rvalue, &qdisc->parent); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse 'Parent=', ignoring assignment: %s", + rvalue); + return 0; + } + } + + if (STR_IN_SET(rvalue, "clsact", "ingress")) { + r = free_and_strdup(&qdisc->tca_kind, rvalue); + if (r < 0) + return log_oom(); + } else + qdisc->tca_kind = mfree(qdisc->tca_kind); + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_qdisc_handle( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + uint16_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(ltype, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + if (isempty(rvalue)) { + qdisc->handle = TC_H_UNSPEC; + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou16_full(rvalue, 16, &n); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse 'Handle=', ignoring assignment: %s", + rvalue); + return 0; + } + + qdisc->handle = (uint32_t) n << 16; + TAKE_PTR(qdisc); + + return 0; +} diff --git a/src/network/tc/qdisc.h b/src/network/tc/qdisc.h new file mode 100644 index 0000000..a62b941 --- /dev/null +++ b/src/network/tc/qdisc.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "networkd-util.h" + +typedef struct Link Link; +typedef struct Manager Manager; +typedef struct Network Network; + +typedef enum QDiscKind { + QDISC_KIND_BFIFO, + QDISC_KIND_CAKE, + QDISC_KIND_CODEL, + QDISC_KIND_DRR, + QDISC_KIND_ETS, + QDISC_KIND_FQ, + QDISC_KIND_FQ_CODEL, + QDISC_KIND_FQ_PIE, + QDISC_KIND_GRED, + QDISC_KIND_HHF, + QDISC_KIND_HTB, + QDISC_KIND_NETEM, + QDISC_KIND_PFIFO, + QDISC_KIND_PFIFO_FAST, + QDISC_KIND_PFIFO_HEAD_DROP, + QDISC_KIND_PIE, + QDISC_KIND_QFQ, + QDISC_KIND_SFB, + QDISC_KIND_SFQ, + QDISC_KIND_TBF, + QDISC_KIND_TEQL, + _QDISC_KIND_MAX, + _QDISC_KIND_INVALID = -EINVAL, +} QDiscKind; + +typedef struct QDisc { + Link *link; + Network *network; + ConfigSection *section; + NetworkConfigSource source; + NetworkConfigState state; + + uint32_t handle; + uint32_t parent; + + char *tca_kind; + QDiscKind kind; +} QDisc; + +typedef struct QDiscVTable { + size_t object_size; + const char *tca_kind; + /* called in qdisc_new() */ + int (*init)(QDisc *qdisc); + int (*fill_message)(Link *link, QDisc *qdisc, sd_netlink_message *m); + int (*verify)(QDisc *qdisc); + int (*is_ready)(QDisc *qdisc, Link *link); +} QDiscVTable; + +extern const QDiscVTable * const qdisc_vtable[_QDISC_KIND_MAX]; + +#define QDISC_VTABLE(q) ((q)->kind != _QDISC_KIND_INVALID ? qdisc_vtable[(q)->kind] : NULL) + +/* For casting a qdisc into the various qdisc kinds */ +#define DEFINE_QDISC_CAST(UPPERCASE, MixedCase) \ + static inline MixedCase* UPPERCASE(QDisc *q) { \ + if (_unlikely_(!q || q->kind != QDISC_KIND_##UPPERCASE)) \ + return NULL; \ + \ + return (MixedCase*) q; \ + } + +DEFINE_NETWORK_CONFIG_STATE_FUNCTIONS(QDisc, qdisc); + +QDisc* qdisc_free(QDisc *qdisc); +int qdisc_new_static(QDiscKind kind, Network *network, const char *filename, unsigned section_line, QDisc **ret); + +QDisc* qdisc_drop(QDisc *qdisc); + +int link_find_qdisc(Link *link, uint32_t handle, const char *kind, QDisc **qdisc); + +int link_request_qdisc(Link *link, QDisc *qdisc); + +void network_drop_invalid_qdisc(Network *network); + +int manager_rtnl_process_qdisc(sd_netlink *rtnl, sd_netlink_message *message, Manager *m); + +DEFINE_SECTION_CLEANUP_FUNCTIONS(QDisc, qdisc_free); + +CONFIG_PARSER_PROTOTYPE(config_parse_qdisc_parent); +CONFIG_PARSER_PROTOTYPE(config_parse_qdisc_handle); + +#include "cake.h" +#include "codel.h" +#include "ets.h" +#include "fifo.h" +#include "fq-codel.h" +#include "fq-pie.h" +#include "fq.h" +#include "gred.h" +#include "hhf.h" +#include "htb.h" +#include "pie.h" +#include "qfq.h" +#include "netem.h" +#include "drr.h" +#include "sfb.h" +#include "sfq.h" +#include "tbf.h" +#include "teql.h" diff --git a/src/network/tc/qfq.c b/src/network/tc/qfq.c new file mode 100644 index 0000000..7702e6f --- /dev/null +++ b/src/network/tc/qfq.c @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "parse-util.h" +#include "qdisc.h" +#include "qfq.h" +#include "string-util.h" + +#define QFQ_MAX_WEIGHT (1 << 10) +#define QFQ_MIN_MAX_PACKET 512 +#define QFQ_MAX_MAX_PACKET (1 << 16) + +const QDiscVTable qfq_vtable = { + .object_size = sizeof(QuickFairQueueing), + .tca_kind = "qfq", +}; + +static int quick_fair_queueing_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) { + QuickFairQueueingClass *qfq; + int r; + + assert(link); + assert(tclass); + assert(req); + + assert_se(qfq = TCLASS_TO_QFQ(tclass)); + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "qfq"); + if (r < 0) + return r; + + if (qfq->weight > 0) { + r = sd_netlink_message_append_u32(req, TCA_QFQ_WEIGHT, qfq->weight); + if (r < 0) + return r; + } + + if (qfq->max_packet > 0) { + r = sd_netlink_message_append_u32(req, TCA_QFQ_LMAX, qfq->max_packet); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_quick_fair_queueing_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + QuickFairQueueingClass *qfq; + Network *network = ASSERT_PTR(data); + uint32_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_QFQ, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + qfq = TCLASS_TO_QFQ(tclass); + + if (isempty(rvalue)) { + qfq->weight = 0; + TAKE_PTR(tclass); + return 0; + } + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (v == 0 || v > QFQ_MAX_WEIGHT) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + qfq->weight = v; + TAKE_PTR(tclass); + + return 0; +} + +int config_parse_quick_fair_queueing_max_packet( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + QuickFairQueueingClass *qfq; + Network *network = ASSERT_PTR(data); + uint64_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(TCLASS_KIND_QFQ, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + qfq = TCLASS_TO_QFQ(tclass); + + if (isempty(rvalue)) { + qfq->max_packet = 0; + TAKE_PTR(tclass); + return 0; + } + + r = parse_size(rvalue, 1024, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (v < QFQ_MIN_MAX_PACKET || v > QFQ_MAX_MAX_PACKET) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + qfq->max_packet = (uint32_t) v; + TAKE_PTR(tclass); + + return 0; +} + +const TClassVTable qfq_tclass_vtable = { + .object_size = sizeof(QuickFairQueueingClass), + .tca_kind = "qfq", + .fill_message = quick_fair_queueing_class_fill_message, +}; diff --git a/src/network/tc/qfq.h b/src/network/tc/qfq.h new file mode 100644 index 0000000..0f013a9 --- /dev/null +++ b/src/network/tc/qfq.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct QuickFairQueueing { + QDisc meta; +} QuickFairQueueing; + +DEFINE_QDISC_CAST(QFQ, QuickFairQueueing); +extern const QDiscVTable qfq_vtable; + +typedef struct QuickFairQueueingClass { + TClass meta; + + uint32_t weight; + uint32_t max_packet; +} QuickFairQueueingClass; + +DEFINE_TCLASS_CAST(QFQ, QuickFairQueueingClass); +extern const TClassVTable qfq_tclass_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_quick_fair_queueing_weight); +CONFIG_PARSER_PROTOTYPE(config_parse_quick_fair_queueing_max_packet); diff --git a/src/network/tc/sfb.c b/src/network/tc/sfb.c new file mode 100644 index 0000000..861c5fe --- /dev/null +++ b/src/network/tc/sfb.c @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "sfb.h" +#include "string-util.h" + +static int stochastic_fair_blue_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + StochasticFairBlue *sfb; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(sfb = SFB(qdisc)); + + const struct tc_sfb_qopt opt = { + .rehash_interval = 600*1000, + .warmup_time = 60*1000, + .penalty_rate = 10, + .penalty_burst = 20, + .increment = (SFB_MAX_PROB + 1000) / 2000, + .decrement = (SFB_MAX_PROB + 10000) / 20000, + .max = 25, + .bin_size = 20, + .limit = sfb->packet_limit, + }; + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "sfb"); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_SFB_PARMS, &opt, sizeof(opt)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_stochastic_fair_blue_u32( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + StochasticFairBlue *sfb; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_SFB, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + sfb = SFB(qdisc); + + if (isempty(rvalue)) { + sfb->packet_limit = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou32(rvalue, &sfb->packet_limit); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable sfb_vtable = { + .object_size = sizeof(StochasticFairBlue), + .tca_kind = "sfb", + .fill_message = stochastic_fair_blue_fill_message, +}; diff --git a/src/network/tc/sfb.h b/src/network/tc/sfb.h new file mode 100644 index 0000000..628df35 --- /dev/null +++ b/src/network/tc/sfb.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2020 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct StochasticFairBlue { + QDisc meta; + + uint32_t packet_limit; +} StochasticFairBlue; + +DEFINE_QDISC_CAST(SFB, StochasticFairBlue); +extern const QDiscVTable sfb_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_stochastic_fair_blue_u32); diff --git a/src/network/tc/sfq.c b/src/network/tc/sfq.c new file mode 100644 index 0000000..92dbae1 --- /dev/null +++ b/src/network/tc/sfq.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "qdisc.h" +#include "sfq.h" +#include "string-util.h" + +static int stochastic_fairness_queueing_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + StochasticFairnessQueueing *sfq; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(sfq = SFQ(qdisc)); + + const struct tc_sfq_qopt_v1 opt = { + .v0.perturb_period = sfq->perturb_period / USEC_PER_SEC, + }; + + r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(opt)); + if (r < 0) + return r; + + return 0; +} + +int config_parse_stochastic_fairness_queueing_perturb_period( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + StochasticFairnessQueueing *sfq; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_SFQ, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + sfq = SFQ(qdisc); + + if (isempty(rvalue)) { + sfq->perturb_period = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_sec(rvalue, &sfq->perturb_period); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + TAKE_PTR(qdisc); + + return 0; +} + +const QDiscVTable sfq_vtable = { + .object_size = sizeof(StochasticFairnessQueueing), + .tca_kind = "sfq", + .fill_message = stochastic_fairness_queueing_fill_message, +}; diff --git a/src/network/tc/sfq.h b/src/network/tc/sfq.h new file mode 100644 index 0000000..1626775 --- /dev/null +++ b/src/network/tc/sfq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "time-util.h" + +typedef struct StochasticFairnessQueueing { + QDisc meta; + + usec_t perturb_period; +} StochasticFairnessQueueing; + +DEFINE_QDISC_CAST(SFQ, StochasticFairnessQueueing); +extern const QDiscVTable sfq_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_stochastic_fairness_queueing_perturb_period); diff --git a/src/network/tc/tbf.c b/src/network/tc/tbf.c new file mode 100644 index 0000000..647fc8c --- /dev/null +++ b/src/network/tc/tbf.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> +#include <math.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "netem.h" +#include "netlink-util.h" +#include "networkd-manager.h" +#include "parse-util.h" +#include "qdisc.h" +#include "string-util.h" +#include "strv.h" +#include "tc-util.h" + +static int token_bucket_filter_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { + uint32_t rtab[256], ptab[256]; + TokenBucketFilter *tbf; + int r; + + assert(link); + assert(qdisc); + assert(req); + + assert_se(tbf = TBF(qdisc)); + + struct tc_tbf_qopt opt = { + .rate.rate = tbf->rate >= (1ULL << 32) ? ~0U : tbf->rate, + .peakrate.rate = tbf->peak_rate >= (1ULL << 32) ? ~0U : tbf->peak_rate, + .rate.mpu = tbf->mpu, + }; + + if (tbf->limit > 0) + opt.limit = tbf->limit; + else { + double lim, lim2; + + lim = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst; + if (tbf->peak_rate > 0) { + lim2 = tbf->peak_rate * (double) tbf->latency / USEC_PER_SEC + tbf->mtu; + lim = MIN(lim, lim2); + } + opt.limit = lim; + } + + r = tc_fill_ratespec_and_table(&opt.rate, rtab, tbf->mtu); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate ratespec: %m"); + + r = tc_transmit_time(opt.rate.rate, tbf->burst, &opt.buffer); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate buffer size: %m"); + + if (opt.peakrate.rate > 0) { + opt.peakrate.mpu = tbf->mpu; + + r = tc_fill_ratespec_and_table(&opt.peakrate, ptab, tbf->mtu); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate ratespec: %m"); + + r = tc_transmit_time(opt.peakrate.rate, tbf->mtu, &opt.mtu); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to calculate mtu size: %m"); + } + + r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "tbf"); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_TBF_PARMS, &opt, sizeof(opt)); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_TBF_BURST, &tbf->burst, sizeof(tbf->burst)); + if (r < 0) + return r; + + if (tbf->rate >= (1ULL << 32)) { + r = sd_netlink_message_append_u64(req, TCA_TBF_RATE64, tbf->rate); + if (r < 0) + return r; + } + + r = sd_netlink_message_append_data(req, TCA_TBF_RTAB, rtab, sizeof(rtab)); + if (r < 0) + return r; + + if (opt.peakrate.rate > 0) { + if (tbf->peak_rate >= (1ULL << 32)) { + r = sd_netlink_message_append_u64(req, TCA_TBF_PRATE64, tbf->peak_rate); + if (r < 0) + return r; + } + + r = sd_netlink_message_append_u32(req, TCA_TBF_PBURST, tbf->mtu); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(req, TCA_TBF_PTAB, ptab, sizeof(ptab)); + if (r < 0) + return r; + } + + r = sd_netlink_message_close_container(req); + if (r < 0) + return r; + + return 0; +} + +int config_parse_token_bucket_filter_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + TokenBucketFilter *tbf; + uint64_t k; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + tbf = TBF(qdisc); + + if (isempty(rvalue)) { + if (STR_IN_SET(lvalue, "BurstBytes", "Burst")) + tbf->burst = 0; + else if (STR_IN_SET(lvalue, "LimitBytes", "LimitSize")) + tbf->limit = 0; + else if (streq(lvalue, "MTUBytes")) + tbf->mtu = 0; + else if (streq(lvalue, "MPUBytes")) + tbf->mpu = 0; + else + assert_not_reached(); + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_size(rvalue, 1024, &k); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + if (STR_IN_SET(lvalue, "BurstBytes", "Burst")) + tbf->burst = k; + else if (STR_IN_SET(lvalue, "LimitBytes", "LimitSize")) + tbf->limit = k; + else if (streq(lvalue, "MPUBytes")) + tbf->mpu = k; + else if (streq(lvalue, "MTUBytes")) + tbf->mtu = k; + else + assert_not_reached(); + + TAKE_PTR(qdisc); + + return 0; +} + +int config_parse_token_bucket_filter_rate( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + TokenBucketFilter *tbf; + uint64_t k, *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + tbf = TBF(qdisc); + if (streq(lvalue, "Rate")) + p = &tbf->rate; + else if (streq(lvalue, "PeakRate")) + p = &tbf->peak_rate; + else + assert_not_reached(); + + if (isempty(rvalue)) { + *p = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = parse_size(rvalue, 1000, &k); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + *p = k / 8; + + qdisc = NULL; + + return 0; +} + +int config_parse_token_bucket_filter_latency( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + Network *network = ASSERT_PTR(data); + TokenBucketFilter *tbf; + usec_t u; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + tbf = TBF(qdisc); + + if (isempty(rvalue)) { + tbf->latency = 0; + + qdisc = NULL; + return 0; + } + + r = parse_sec(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + + tbf->latency = u; + + qdisc = NULL; + + return 0; +} + +static int token_bucket_filter_verify(QDisc *qdisc) { + TokenBucketFilter *tbf = TBF(qdisc); + + if (tbf->limit > 0 && tbf->latency > 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Specifying both LimitBytes= and LatencySec= is not allowed. " + "Ignoring [TokenBucketFilter] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + if (tbf->limit == 0 && tbf->latency == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Either LimitBytes= or LatencySec= is required. " + "Ignoring [TokenBucketFilter] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + if (tbf->rate == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Rate= is mandatory. " + "Ignoring [TokenBucketFilter] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + if (tbf->burst == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: BurstBytes= is mandatory. " + "Ignoring [TokenBucketFilter] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + if (tbf->peak_rate > 0 && tbf->mtu == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: MTUBytes= is mandatory when PeakRate= is specified. " + "Ignoring [TokenBucketFilter] section from line %u.", + qdisc->section->filename, qdisc->section->line); + + return 0; +} + +const QDiscVTable tbf_vtable = { + .object_size = sizeof(TokenBucketFilter), + .tca_kind = "tbf", + .fill_message = token_bucket_filter_fill_message, + .verify = token_bucket_filter_verify +}; diff --git a/src/network/tc/tbf.h b/src/network/tc/tbf.h new file mode 100644 index 0000000..6b4b017 --- /dev/null +++ b/src/network/tc/tbf.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" +#include "time-util.h" + +typedef struct TokenBucketFilter { + QDisc meta; + + uint64_t rate; + uint64_t peak_rate; + uint32_t burst; + uint32_t mtu; + usec_t latency; + size_t limit; + size_t mpu; +} TokenBucketFilter; + +DEFINE_QDISC_CAST(TBF, TokenBucketFilter); +extern const QDiscVTable tbf_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_latency); +CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_size); +CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_rate); diff --git a/src/network/tc/tc-util.c b/src/network/tc/tc-util.c new file mode 100644 index 0000000..3781182 --- /dev/null +++ b/src/network/tc/tc-util.c @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include "alloc-util.h" +#include "extract-word.h" +#include "fileio.h" +#include "parse-util.h" +#include "percent-util.h" +#include "tc-util.h" +#include "time-util.h" + +int tc_init(double *ret_ticks_in_usec, uint32_t *ret_hz) { + static double ticks_in_usec = -1; + static uint32_t hz; + + if (ticks_in_usec < 0) { + uint32_t clock_resolution, ticks_to_usec, usec_to_ticks; + _cleanup_free_ char *line = NULL; + double clock_factor; + int r; + + r = read_one_line_file("/proc/net/psched", &line); + if (r < 0) + return r; + + r = sscanf(line, "%08x%08x%08x%08x", &ticks_to_usec, &usec_to_ticks, &clock_resolution, &hz); + if (r < 4) + return -EIO; + + clock_factor = (double) clock_resolution / USEC_PER_SEC; + ticks_in_usec = (double) ticks_to_usec / usec_to_ticks * clock_factor; + } + + if (ret_ticks_in_usec) + *ret_ticks_in_usec = ticks_in_usec; + if (ret_hz) + *ret_hz = hz; + + return 0; +} + +int tc_time_to_tick(usec_t t, uint32_t *ret) { + double ticks_in_usec; + usec_t a; + int r; + + assert(ret); + + r = tc_init(&ticks_in_usec, NULL); + if (r < 0) + return r; + + a = t * ticks_in_usec; + if (a > UINT32_MAX) + return -ERANGE; + + *ret = a; + return 0; +} + +int parse_tc_percent(const char *s, uint32_t *ret_fraction) { + int r; + + assert(s); + assert(ret_fraction); + + r = parse_permyriad(s); + if (r < 0) + return r; + + *ret_fraction = (double) r / 10000 * UINT32_MAX; + return 0; +} + +int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret) { + return tc_time_to_tick(USEC_PER_SEC * ((double)size / (double)rate), ret); +} + +int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu) { + uint32_t cell_log = 0; + int r; + + if (mtu == 0) + mtu = 2047; + + while ((mtu >> cell_log) > 255) + cell_log++; + + for (size_t i = 0; i < 256; i++) { + uint32_t sz; + + sz = (i + 1) << cell_log; + if (sz < rate->mpu) + sz = rate->mpu; + r = tc_transmit_time(rate->rate, sz, &rtab[i]); + if (r < 0) + return r; + } + + rate->cell_align = -1; + rate->cell_log = cell_log; + rate->linklayer = TC_LINKLAYER_ETHERNET; + return 0; +} + +int parse_handle(const char *t, uint32_t *ret) { + _cleanup_free_ char *word = NULL; + uint16_t major, minor; + int r; + + assert(t); + assert(ret); + + /* Extract the major number. */ + r = extract_first_word(&t, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + if (!t) + return -EINVAL; + + r = safe_atou16_full(word, 16, &major); + if (r < 0) + return r; + + r = safe_atou16_full(t, 16, &minor); + if (r < 0) + return r; + + *ret = ((uint32_t) major << 16) | minor; + return 0; +} diff --git a/src/network/tc/tc-util.h b/src/network/tc/tc-util.h new file mode 100644 index 0000000..83bad8e --- /dev/null +++ b/src/network/tc/tc-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include <linux/pkt_sched.h> + +#include "time-util.h" + +int tc_init(double *ret_ticks_in_usec, uint32_t *ret_hz); +int tc_time_to_tick(usec_t t, uint32_t *ret); +int parse_tc_percent(const char *s, uint32_t *percent); +int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret); +int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu); +int parse_handle(const char *t, uint32_t *ret); diff --git a/src/network/tc/tc.c b/src/network/tc/tc.c new file mode 100644 index 0000000..8a1c5b3 --- /dev/null +++ b/src/network/tc/tc.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "macro.h" +#include "networkd-link.h" +#include "networkd-network.h" +#include "qdisc.h" +#include "tc.h" +#include "tclass.h" + +int link_request_traffic_control(Link *link) { + TClass *tclass; + QDisc *qdisc; + int r; + + assert(link); + assert(link->network); + + link->tc_configured = false; + + HASHMAP_FOREACH(qdisc, link->network->qdiscs_by_section) { + r = link_request_qdisc(link, qdisc); + if (r < 0) + return r; + } + + HASHMAP_FOREACH(tclass, link->network->tclasses_by_section) { + r = link_request_tclass(link, tclass); + if (r < 0) + return r; + } + + if (link->tc_messages == 0) { + link->tc_configured = true; + link_check_ready(link); + } else { + log_link_debug(link, "Setting traffic control"); + link_set_state(link, LINK_STATE_CONFIGURING); + } + + return 0; +} diff --git a/src/network/tc/tc.h b/src/network/tc/tc.h new file mode 100644 index 0000000..6226578 --- /dev/null +++ b/src/network/tc/tc.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +typedef struct Link Link; + +int link_request_traffic_control(Link *link); diff --git a/src/network/tc/tclass.c b/src/network/tc/tclass.c new file mode 100644 index 0000000..0a5fec0 --- /dev/null +++ b/src/network/tc/tclass.c @@ -0,0 +1,639 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ + +#include <linux/pkt_sched.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "in-addr-util.h" +#include "netlink-util.h" +#include "networkd-link.h" +#include "networkd-manager.h" +#include "networkd-network.h" +#include "networkd-queue.h" +#include "parse-util.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" +#include "tc-util.h" +#include "tclass.h" + +const TClassVTable * const tclass_vtable[_TCLASS_KIND_MAX] = { + [TCLASS_KIND_DRR] = &drr_tclass_vtable, + [TCLASS_KIND_HTB] = &htb_tclass_vtable, + [TCLASS_KIND_QFQ] = &qfq_tclass_vtable, +}; + +static int tclass_new(TClassKind kind, TClass **ret) { + _cleanup_(tclass_freep) TClass *tclass = NULL; + int r; + + if (kind == _TCLASS_KIND_INVALID) { + tclass = new(TClass, 1); + if (!tclass) + return -ENOMEM; + + *tclass = (TClass) { + .parent = TC_H_ROOT, + .kind = kind, + }; + } else { + assert(kind >= 0 && kind < _TCLASS_KIND_MAX); + tclass = malloc0(tclass_vtable[kind]->object_size); + if (!tclass) + return -ENOMEM; + + tclass->parent = TC_H_ROOT; + tclass->kind = kind; + + if (TCLASS_VTABLE(tclass)->init) { + r = TCLASS_VTABLE(tclass)->init(tclass); + if (r < 0) + return r; + } + } + + *ret = TAKE_PTR(tclass); + + return 0; +} + +int tclass_new_static(TClassKind kind, Network *network, const char *filename, unsigned section_line, TClass **ret) { + _cleanup_(config_section_freep) ConfigSection *n = NULL; + _cleanup_(tclass_freep) TClass *tclass = NULL; + TClass *existing; + int r; + + assert(network); + assert(ret); + assert(filename); + assert(section_line > 0); + + r = config_section_new(filename, section_line, &n); + if (r < 0) + return r; + + existing = hashmap_get(network->tclasses_by_section, n); + if (existing) { + if (existing->kind != kind) + return -EINVAL; + + *ret = existing; + return 0; + } + + r = tclass_new(kind, &tclass); + if (r < 0) + return r; + + tclass->network = network; + tclass->section = TAKE_PTR(n); + tclass->source = NETWORK_CONFIG_SOURCE_STATIC; + + r = hashmap_ensure_put(&network->tclasses_by_section, &config_section_hash_ops, tclass->section, tclass); + if (r < 0) + return r; + + *ret = TAKE_PTR(tclass); + return 0; +} + +TClass* tclass_free(TClass *tclass) { + if (!tclass) + return NULL; + + if (tclass->network && tclass->section) + hashmap_remove(tclass->network->tclasses_by_section, tclass->section); + + config_section_free(tclass->section); + + if (tclass->link) + set_remove(tclass->link->tclasses, tclass); + + free(tclass->tca_kind); + return mfree(tclass); +} + +static const char *tclass_get_tca_kind(const TClass *tclass) { + assert(tclass); + + return (TCLASS_VTABLE(tclass) && TCLASS_VTABLE(tclass)->tca_kind) ? + TCLASS_VTABLE(tclass)->tca_kind : tclass->tca_kind; +} + +static void tclass_hash_func(const TClass *tclass, struct siphash *state) { + assert(tclass); + assert(state); + + siphash24_compress(&tclass->classid, sizeof(tclass->classid), state); + siphash24_compress(&tclass->parent, sizeof(tclass->parent), state); + siphash24_compress_string(tclass_get_tca_kind(tclass), state); +} + +static int tclass_compare_func(const TClass *a, const TClass *b) { + int r; + + assert(a); + assert(b); + + r = CMP(a->classid, b->classid); + if (r != 0) + return r; + + r = CMP(a->parent, b->parent); + if (r != 0) + return r; + + return strcmp_ptr(tclass_get_tca_kind(a), tclass_get_tca_kind(b)); +} + +DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR( + tclass_hash_ops, + TClass, + tclass_hash_func, + tclass_compare_func, + tclass_free); + +static int tclass_get(Link *link, const TClass *in, TClass **ret) { + TClass *existing; + + assert(link); + assert(in); + + existing = set_get(link->tclasses, in); + if (!existing) + return -ENOENT; + + if (ret) + *ret = existing; + return 0; +} + +static int tclass_add(Link *link, TClass *tclass) { + int r; + + assert(link); + assert(tclass); + + r = set_ensure_put(&link->tclasses, &tclass_hash_ops, tclass); + if (r < 0) + return r; + if (r == 0) + return -EEXIST; + + tclass->link = link; + return 0; +} + +static int tclass_dup(const TClass *src, TClass **ret) { + _cleanup_(tclass_freep) TClass *dst = NULL; + + assert(src); + assert(ret); + + if (TCLASS_VTABLE(src)) + dst = memdup(src, TCLASS_VTABLE(src)->object_size); + else + dst = newdup(TClass, src, 1); + if (!dst) + return -ENOMEM; + + /* clear all pointers */ + dst->network = NULL; + dst->section = NULL; + dst->link = NULL; + dst->tca_kind = NULL; + + if (src->tca_kind) { + dst->tca_kind = strdup(src->tca_kind); + if (!dst->tca_kind) + return -ENOMEM; + } + + *ret = TAKE_PTR(dst); + return 0; +} + +int link_find_tclass(Link *link, uint32_t classid, TClass **ret) { + TClass *tclass; + + assert(link); + + SET_FOREACH(tclass, link->tclasses) { + if (tclass->classid != classid) + continue; + + if (!tclass_exists(tclass)) + continue; + + if (ret) + *ret = tclass; + return 0; + } + + return -ENOENT; +} + +static void log_tclass_debug(TClass *tclass, Link *link, const char *str) { + _cleanup_free_ char *state = NULL; + + assert(tclass); + assert(str); + + if (!DEBUG_LOGGING) + return; + + (void) network_config_state_to_string_alloc(tclass->state, &state); + + log_link_debug(link, "%s %s TClass (%s): classid=%"PRIx32":%"PRIx32", parent=%"PRIx32":%"PRIx32", kind=%s", + str, strna(network_config_source_to_string(tclass->source)), strna(state), + TC_H_MAJ(tclass->classid) >> 16, TC_H_MIN(tclass->classid), + TC_H_MAJ(tclass->parent) >> 16, TC_H_MIN(tclass->parent), + strna(tclass_get_tca_kind(tclass))); +} + +TClass* tclass_drop(TClass *tclass) { + QDisc *qdisc; + Link *link; + + assert(tclass); + + link = ASSERT_PTR(tclass->link); + + /* Also drop all child qdiscs assigned to the class. */ + SET_FOREACH(qdisc, link->qdiscs) { + if (qdisc->parent != tclass->classid) + continue; + + qdisc_drop(qdisc); + } + + tclass_enter_removed(tclass); + + if (tclass->state == 0) { + log_tclass_debug(tclass, link, "Forgetting"); + tclass = tclass_free(tclass); + } else + log_tclass_debug(tclass, link, "Removed"); + + return tclass; +} + +static int tclass_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, Link *link, TClass *tclass) { + int r; + + assert(m); + assert(link); + + r = sd_netlink_message_get_errno(m); + if (r < 0 && r != -EEXIST) { + log_link_message_error_errno(link, m, r, "Could not set TClass"); + link_enter_failed(link); + return 1; + } + + if (link->tc_messages == 0) { + log_link_debug(link, "Traffic control configured"); + link->tc_configured = true; + link_check_ready(link); + } + + return 1; +} + +static int tclass_configure(TClass *tclass, Link *link, Request *req) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + assert(tclass); + assert(link); + assert(link->manager); + assert(link->manager->rtnl); + assert(link->ifindex > 0); + assert(req); + + log_tclass_debug(tclass, link, "Configuring"); + + r = sd_rtnl_message_new_traffic_control(link->manager->rtnl, &m, RTM_NEWTCLASS, + link->ifindex, tclass->classid, tclass->parent); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, TCA_KIND, TCLASS_VTABLE(tclass)->tca_kind); + if (r < 0) + return r; + + if (TCLASS_VTABLE(tclass)->fill_message) { + r = TCLASS_VTABLE(tclass)->fill_message(link, tclass, m); + if (r < 0) + return r; + } + + return request_call_netlink_async(link->manager->rtnl, m, req); +} + +static bool tclass_is_ready_to_configure(TClass *tclass, Link *link) { + assert(tclass); + assert(link); + + if (!IN_SET(link->state, LINK_STATE_CONFIGURING, LINK_STATE_CONFIGURED)) + return false; + + return link_find_qdisc(link, TC_H_MAJ(tclass->classid), tclass_get_tca_kind(tclass), NULL) >= 0; +} + +static int tclass_process_request(Request *req, Link *link, TClass *tclass) { + int r; + + assert(req); + assert(link); + assert(tclass); + + if (!tclass_is_ready_to_configure(tclass, link)) + return 0; + + r = tclass_configure(tclass, link, req); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to configure TClass: %m"); + + tclass_enter_configuring(tclass); + return 1; +} + +int link_request_tclass(Link *link, TClass *tclass) { + TClass *existing; + int r; + + assert(link); + assert(tclass); + + if (tclass_get(link, tclass, &existing) < 0) { + _cleanup_(tclass_freep) TClass *tmp = NULL; + + r = tclass_dup(tclass, &tmp); + if (r < 0) + return log_oom(); + + r = tclass_add(link, tmp); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to store TClass: %m"); + + existing = TAKE_PTR(tmp); + } else + existing->source = tclass->source; + + log_tclass_debug(existing, link, "Requesting"); + r = link_queue_request_safe(link, REQUEST_TYPE_TC_CLASS, + existing, NULL, + tclass_hash_func, + tclass_compare_func, + tclass_process_request, + &link->tc_messages, + tclass_handler, + NULL); + if (r < 0) + return log_link_warning_errno(link, r, "Failed to request TClass: %m"); + if (r == 0) + return 0; + + tclass_enter_requesting(existing); + return 1; +} + +int manager_rtnl_process_tclass(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) { + _cleanup_(tclass_freep) TClass *tmp = NULL; + TClass *tclass = NULL; + Link *link; + uint16_t type; + int ifindex, r; + + assert(rtnl); + assert(message); + assert(m); + + if (sd_netlink_message_is_error(message)) { + r = sd_netlink_message_get_errno(message); + if (r < 0) + log_message_warning_errno(message, r, "rtnl: failed to receive TClass message, ignoring"); + + return 0; + } + + r = sd_netlink_message_get_type(message, &type); + if (r < 0) { + log_warning_errno(r, "rtnl: could not get message type, ignoring: %m"); + return 0; + } else if (!IN_SET(type, RTM_NEWTCLASS, RTM_DELTCLASS)) { + log_warning("rtnl: received unexpected message type %u when processing TClass, ignoring.", type); + return 0; + } + + r = sd_rtnl_message_traffic_control_get_ifindex(message, &ifindex); + if (r < 0) { + log_warning_errno(r, "rtnl: could not get ifindex from message, ignoring: %m"); + return 0; + } else if (ifindex <= 0) { + log_warning("rtnl: received TClass message with invalid ifindex %d, ignoring.", ifindex); + return 0; + } + + if (link_get_by_index(m, ifindex, &link) < 0) { + if (!m->enumerating) + log_warning("rtnl: received TClass for link '%d' we don't know about, ignoring.", ifindex); + return 0; + } + + r = tclass_new(_TCLASS_KIND_INVALID, &tmp); + if (r < 0) + return log_oom(); + + r = sd_rtnl_message_traffic_control_get_handle(message, &tmp->classid); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received TClass message without handle, ignoring: %m"); + return 0; + } + + r = sd_rtnl_message_traffic_control_get_parent(message, &tmp->parent); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received TClass message without parent, ignoring: %m"); + return 0; + } + + r = sd_netlink_message_read_string_strdup(message, TCA_KIND, &tmp->tca_kind); + if (r < 0) { + log_link_warning_errno(link, r, "rtnl: received TClass message without kind, ignoring: %m"); + return 0; + } + + (void) tclass_get(link, tmp, &tclass); + + switch (type) { + case RTM_NEWTCLASS: + if (tclass) { + tclass_enter_configured(tclass); + log_tclass_debug(tclass, link, "Received remembered"); + } else { + tclass_enter_configured(tmp); + log_tclass_debug(tmp, link, "Received new"); + + r = tclass_add(link, tmp); + if (r < 0) { + log_link_warning_errno(link, r, "Failed to remember TClass, ignoring: %m"); + return 0; + } + + tclass = TAKE_PTR(tmp); + } + + break; + + case RTM_DELTCLASS: + if (tclass) + (void) tclass_drop(tclass); + else + log_tclass_debug(tmp, link, "Kernel removed unknown"); + + break; + + default: + assert_not_reached(); + } + + return 1; +} + +int link_enumerate_tclass(Link *link, uint32_t parent) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; + int r; + + assert(link); + assert(link->manager); + assert(link->manager->rtnl); + + r = sd_rtnl_message_new_traffic_control(link->manager->rtnl, &req, RTM_GETTCLASS, link->ifindex, 0, parent); + if (r < 0) + return r; + + return manager_enumerate_internal(link->manager, link->manager->rtnl, req, manager_rtnl_process_tclass); +} + +static int tclass_section_verify(TClass *tclass) { + int r; + + assert(tclass); + + if (section_is_invalid(tclass->section)) + return -EINVAL; + + if (TCLASS_VTABLE(tclass)->verify) { + r = TCLASS_VTABLE(tclass)->verify(tclass); + if (r < 0) + return r; + } + + return 0; +} + +void network_drop_invalid_tclass(Network *network) { + TClass *tclass; + + assert(network); + + HASHMAP_FOREACH(tclass, network->tclasses_by_section) + if (tclass_section_verify(tclass) < 0) + tclass_free(tclass); +} + +int config_parse_tclass_parent( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(ltype, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + if (streq(rvalue, "root")) + tclass->parent = TC_H_ROOT; + else { + r = parse_handle(rvalue, &tclass->parent); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse 'Parent=', ignoring assignment: %s", + rvalue); + return 0; + } + } + + TAKE_PTR(tclass); + + return 0; +} + +int config_parse_tclass_classid( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL; + Network *network = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = tclass_new_static(ltype, network, filename, section_line, &tclass); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create traffic control class, ignoring assignment: %m"); + return 0; + } + + if (isempty(rvalue)) { + tclass->classid = TC_H_UNSPEC; + TAKE_PTR(tclass); + return 0; + } + + r = parse_handle(rvalue, &tclass->classid); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse 'ClassId=', ignoring assignment: %s", + rvalue); + return 0; + } + + TAKE_PTR(tclass); + + return 0; +} diff --git a/src/network/tc/tclass.h b/src/network/tc/tclass.h new file mode 100644 index 0000000..e73e23c --- /dev/null +++ b/src/network/tc/tclass.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * Copyright © 2019 VMware, Inc. */ +#pragma once + +#include "conf-parser.h" +#include "networkd-util.h" + +typedef struct Link Link; +typedef struct Manager Manager; +typedef struct Network Network; + +typedef enum TClassKind { + TCLASS_KIND_DRR, + TCLASS_KIND_HTB, + TCLASS_KIND_QFQ, + _TCLASS_KIND_MAX, + _TCLASS_KIND_INVALID = -EINVAL, +} TClassKind; + +typedef struct TClass { + Link *link; + Network *network; + ConfigSection *section; + NetworkConfigSource source; + NetworkConfigState state; + + uint32_t classid; + uint32_t parent; + + TClassKind kind; + char *tca_kind; +} TClass; + +typedef struct TClassVTable { + size_t object_size; + const char *tca_kind; + /* called in tclass_new() */ + int (*init)(TClass *tclass); + int (*fill_message)(Link *link, TClass *tclass, sd_netlink_message *m); + int (*verify)(TClass *tclass); +} TClassVTable; + +extern const TClassVTable * const tclass_vtable[_TCLASS_KIND_MAX]; + +#define TCLASS_VTABLE(t) ((t)->kind != _TCLASS_KIND_INVALID ? tclass_vtable[(t)->kind] : NULL) + +/* For casting a tclass into the various tclass kinds */ +#define DEFINE_TCLASS_CAST(UPPERCASE, MixedCase) \ + static inline MixedCase* TCLASS_TO_##UPPERCASE(TClass *t) { \ + if (_unlikely_(!t || t->kind != TCLASS_KIND_##UPPERCASE)) \ + return NULL; \ + \ + return (MixedCase*) t; \ + } + +DEFINE_NETWORK_CONFIG_STATE_FUNCTIONS(TClass, tclass); + +TClass* tclass_free(TClass *tclass); +int tclass_new_static(TClassKind kind, Network *network, const char *filename, unsigned section_line, TClass **ret); + +TClass* tclass_drop(TClass *tclass); + +int link_find_tclass(Link *link, uint32_t classid, TClass **ret); + +int link_request_tclass(Link *link, TClass *tclass); + +void network_drop_invalid_tclass(Network *network); + +int manager_rtnl_process_tclass(sd_netlink *rtnl, sd_netlink_message *message, Manager *m); +int link_enumerate_tclass(Link *link, uint32_t parent); + +DEFINE_SECTION_CLEANUP_FUNCTIONS(TClass, tclass_free); + +CONFIG_PARSER_PROTOTYPE(config_parse_tclass_parent); +CONFIG_PARSER_PROTOTYPE(config_parse_tclass_classid); + +#include "drr.h" +#include "htb.h" +#include "qfq.h" diff --git a/src/network/tc/teql.c b/src/network/tc/teql.c new file mode 100644 index 0000000..dcb149d --- /dev/null +++ b/src/network/tc/teql.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "macro.h" +#include "networkd-link.h" +#include "parse-util.h" +#include "string-util.h" +#include "teql.h" + +static int trivial_link_equalizer_verify(QDisc *qdisc) { + _cleanup_free_ char *tca_kind = NULL; + TrivialLinkEqualizer *teql; + + teql = TEQL(ASSERT_PTR(qdisc)); + + if (asprintf(&tca_kind, "teql%u", teql->id) < 0) + return log_oom(); + + return free_and_replace(qdisc->tca_kind, tca_kind); +} + +static int trivial_link_equalizer_is_ready(QDisc *qdisc, Link *link) { + Link *teql; + + assert(qdisc); + assert(qdisc->tca_kind); + assert(link); + assert(link->manager); + + if (link_get_by_name(link->manager, qdisc->tca_kind, &teql) < 0) + return false; + + return link_is_ready_to_configure(teql, /* allow_unmanaged = */ true); +} + +const QDiscVTable teql_vtable = { + .object_size = sizeof(TrivialLinkEqualizer), + .verify = trivial_link_equalizer_verify, + .is_ready = trivial_link_equalizer_is_ready, +}; + +int config_parse_trivial_link_equalizer_id( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL; + TrivialLinkEqualizer *teql; + Network *network = ASSERT_PTR(data); + unsigned id; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = qdisc_new_static(QDISC_KIND_TEQL, network, filename, section_line, &qdisc); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "More than one kind of queueing discipline, ignoring assignment: %m"); + return 0; + } + + teql = TEQL(qdisc); + + if (isempty(rvalue)) { + teql->id = 0; + + TAKE_PTR(qdisc); + return 0; + } + + r = safe_atou(rvalue, &id); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse '%s=', ignoring assignment: %s", + lvalue, rvalue); + return 0; + } + if (id > INT_MAX) + log_syntax(unit, LOG_WARNING, filename, line, 0, + "'%s=' is too large, ignoring assignment: %s", + lvalue, rvalue); + + teql->id = id; + + TAKE_PTR(qdisc); + return 0; +} diff --git a/src/network/tc/teql.h b/src/network/tc/teql.h new file mode 100644 index 0000000..8d0085e --- /dev/null +++ b/src/network/tc/teql.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "conf-parser.h" +#include "qdisc.h" + +typedef struct TrivialLinkEqualizer { + QDisc meta; + + unsigned id; +} TrivialLinkEqualizer; + +DEFINE_QDISC_CAST(TEQL, TrivialLinkEqualizer); +extern const QDiscVTable teql_vtable; + +CONFIG_PARSER_PROTOTYPE(config_parse_trivial_link_equalizer_id); |