From 9f0fc191371843c4fc000a226b0a26b6c059aacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:40:19 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- net/sched/act_api.c | 2 +- net/sched/act_ct.c | 41 +++-- net/sched/act_gate.c | 1 + net/sched/act_mirred.c | 147 +++++++++-------- net/sched/cls_api.c | 35 ++++- net/sched/cls_basic.c | 1 + net/sched/cls_cgroup.c | 1 + net/sched/cls_flower.c | 5 +- net/sched/cls_fw.c | 1 + net/sched/cls_route.c | 38 +++-- net/sched/cls_u32.c | 1 + net/sched/em_meta.c | 2 +- net/sched/sch_cbs.c | 1 + net/sched/sch_choke.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_etf.c | 1 + net/sched/sch_ets.c | 1 + net/sched/sch_fifo.c | 1 + net/sched/sch_fq.c | 385 +++++++++++++++++++++++++++++++++++---------- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_frag.c | 4 +- net/sched/sch_generic.c | 9 +- net/sched/sch_gred.c | 1 + net/sched/sch_hfsc.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_ingress.c | 1 + net/sched/sch_mqprio.c | 1 + net/sched/sch_mqprio_lib.c | 1 + net/sched/sch_multiq.c | 1 + net/sched/sch_netem.c | 3 +- net/sched/sch_plug.c | 1 + net/sched/sch_prio.c | 1 + net/sched/sch_qfq.c | 5 +- net/sched/sch_red.c | 1 + net/sched/sch_sfq.c | 1 + net/sched/sch_skbprio.c | 1 + net/sched/sch_taprio.c | 3 +- net/sched/sch_tbf.c | 1 + net/sched/sch_teql.c | 1 + 39 files changed, 492 insertions(+), 214 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d3f26bf04..c39252d61e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1098,7 +1098,7 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - net_warn_ratelimited("can't go to NULL chain!\n"); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a7b3f60dd0..3d50215985 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -734,7 +734,6 @@ static struct tc_action_ops act_ct_ops; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ - bool labels; }; /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ @@ -872,8 +871,13 @@ static void tcf_ct_params_free(struct tcf_ct_params *params) } if (params->ct_ft) tcf_ct_flow_table_put(params->ct_ft); - if (params->tmpl) + if (params->tmpl) { + if (params->put_labels) + nf_connlabels_put(nf_ct_net(params->tmpl)); + nf_ct_put(params->tmpl); + } + kfree(params); } @@ -1198,9 +1202,9 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; int err, family, proto, len; + bool put_labels = false; struct nf_conn *tmpl; char *name; @@ -1230,15 +1234,20 @@ static int tcf_ct_fill_params(struct net *net, } if (tb[TCA_CT_LABELS]) { + unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); return -EOPNOTSUPP; } - if (!tn->labels) { + if (nf_connlabels_get(net, n_bits - 1)) { NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); return -EOPNOTSUPP; + } else { + put_labels = true; } + tcf_ct_set_key_val(tb, p->labels, TCA_CT_LABELS, p->labels_mask, TCA_CT_LABELS_MASK, @@ -1282,10 +1291,15 @@ static int tcf_ct_fill_params(struct net *net, } } + p->put_labels = put_labels; + if (p->ct_action & TCA_CT_ACT_COMMIT) __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; err: + if (put_labels) + nf_connlabels_put(net); + nf_ct_put(p->tmpl); p->tmpl = NULL; return err; @@ -1589,32 +1603,13 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { - unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - if (nf_connlabels_get(net, n_bits - 1)) { - tn->labels = false; - pr_err("act_ct: Failed to set connlabels length"); - } else { - tn->labels = true; - } - return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - - if (tn->labels) - nf_connlabels_put(net); - } - rtnl_unlock(); - tc_action_net_exit(net_list, act_ct_ops.net_id); } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c9a811f4c7..393b787292 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void) module_init(gate_init_module); module_exit(gate_cleanup_module); +MODULE_DESCRIPTION("TC gate action"); MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c..674f7ae356 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -206,18 +206,14 @@ release_idr: return err; } -static bool is_mirred_nested(void) -{ - return unlikely(__this_cpu_read(mirred_nest_level) > 1); -} - -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +static int +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (is_mirred_nested()) + else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); @@ -225,110 +221,123 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); - goto out; + goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - goto out; + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) + goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); - - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; + + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } + if (err) + tcf_action_inc_overlimit_qstats(&m->common); + + return retval; + +err_cant_do: + if (is_redirect) + retval = TC_ACT_SHOT; + tcf_action_inc_overlimit_qstats(&m->common); + return retval; +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; - err = tcf_mirred_forward(want_ingress, skb2); - if (err) { -out: + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) - retval = TC_ACT_SHOT; + goto dec_nest_level; } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 84e18b5f72..02c594baa1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1664,6 +1664,7 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { + u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1687,12 +1688,16 @@ reclassify: * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || - !tp->ops->get_exts)) + !tp->ops->get_exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } exts = tp->ops->get_exts(tp, n->handle); - if (unlikely(!exts || n->exts != exts)) + if (unlikely(!exts || n->exts != exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); @@ -1714,12 +1719,20 @@ reclassify: goto reset; } #endif - if (err >= 0) + if (err >= 0) { + /* Policy drop or drop reason is over-written by + * classifiers with a bogus value(0) */ + if (err == TC_ACT_SHOT && + res->drop_reason == SKB_NOT_DROPPED_YET) + tcf_set_drop_reason(res, orig_reason); return err; + } } - if (unlikely(n)) + if (unlikely(n)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT @@ -1729,6 +1742,7 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } @@ -1765,8 +1779,10 @@ int tcf_classify(struct sk_buff *skb, if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); - if (!n) + if (!n) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } chain = n->chain_index; } else { @@ -1774,8 +1790,10 @@ int tcf_classify(struct sk_buff *skb, } fchain = tcf_chain_lookup_rcu(block, chain); - if (!fchain) + if (!fchain) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); @@ -1794,8 +1812,11 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) + if (WARN_ON_ONCE(!ext)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } + ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 1b92c33b5f..a1f5693133 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -341,4 +341,5 @@ static void __exit exit_basic(void) module_init(init_basic) module_exit(exit_basic) +MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index bd9322d719..7ee8dbf49e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void) module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); +MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b..6ee7064c82 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ unbind_filter: } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c49d6af0e0..afc534ee0a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -446,4 +446,5 @@ static void __exit exit_fw(void) module_init(init_fw) module_exit(exit_fw) +MODULE_DESCRIPTION("SKB mark based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1e20bbd687..12a505db41 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -375,9 +375,9 @@ out: static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 }, - [TCA_ROUTE4_TO] = { .type = NLA_U32 }, - [TCA_ROUTE4_FROM] = { .type = NLA_U32 }, - [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, + [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF), }; static int route4_set_parms(struct net *net, struct tcf_proto *tp, @@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_ROUTE4_TO]) { - if (new && handle & 0x8000) + if (new && handle & 0x8000) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } to = nla_get_u32(tb[TCA_ROUTE4_TO]); - if (to > 0xFF) - return -EINVAL; nhandle = to; } + if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM], + "'from' and 'fromif' are mutually exclusive"); + return -EINVAL; + } + if (tb[TCA_ROUTE4_FROM]) { - if (tb[TCA_ROUTE4_IIF]) - return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); - if (id > 0xFF) - return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); - if (id > 0x7FFF) - return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; - if (nhandle != handle) + if (nhandle != handle) { + NL_SET_ERR_MSG_FMT(extack, + "Handle mismatch constructed: %x (expected: %x)", + handle, nhandle); return -EINVAL; + } } if (!nhandle) { @@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct route4_filter __rcu **fp; struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; - struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; int err; @@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (opt == NULL) + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing options"); return -EINVAL; + } - err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS], route4_policy, NULL); if (err < 0) return err; @@ -679,4 +684,5 @@ static void __exit exit_route4(void) module_init(init_route4) module_exit(exit_route4) +MODULE_DESCRIPTION("Routing table realm based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6663e971a1..d5bdfd4a76 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1489,4 +1489,5 @@ static void __exit exit_u32(void) module_init(init_u32) module_exit(exit_u32) +MODULE_DESCRIPTION("Universal 32bit based TC Classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index da34fd4c92..09d8afd04a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio) *err = -1; return; } - dst->value = sk->sk_priority; + dst->value = READ_ONCE(sk->sk_priority); } META_COLLECTOR(int_sk_rcvlowat) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index cac870eb78..9a0b85190a 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void) module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Credit Based shaper"); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 19c8511259..ae1da08e26 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -513,3 +513,4 @@ module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Choose and keep responsive flows scheduler"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 19901e77cd..097740a9af 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -495,3 +495,4 @@ static void __exit drr_exit(void) module_init(drr_init); module_exit(drr_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Deficit Round Robin scheduler"); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 61d1f0e32c..4808159a54 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -513,3 +513,4 @@ static void __exit etf_module_exit(void) module_init(etf_module_init) module_exit(etf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index b10efeaf06..f7c8849594 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -826,3 +826,4 @@ static void __exit ets_exit(void) module_init(ets_init); module_exit(ets_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler"); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index e1040421b7..450f5c67ac 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt); +MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler"); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f59a2cb2c8..3a31c47fea 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -2,7 +2,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013-2015 Eric Dumazet + * Copyright (C) 2013-2023 Eric Dumazet * * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. @@ -51,7 +51,8 @@ #include struct fq_skb_cb { - u64 time_to_send; + u64 time_to_send; + u8 band; }; static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) @@ -73,37 +74,41 @@ struct fq_flow { struct sk_buff *tail; /* last skb in the list */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; - struct rb_node fq_node; /* anchor in fq_root[] trees */ + union { + struct rb_node fq_node; /* anchor in fq_root[] trees */ + /* Following field is only used for q->internal, + * because q->internal is not hashed in fq_root[] + */ + u64 stat_fastpath_packets; + }; struct sock *sk; u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ -/* Second cache line, used in fq_dequeue() */ +/* Second cache line */ int credit; - /* 32bit hole on 64bit arches */ - + int band; struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; -} ____cacheline_aligned_in_smp; +}; struct fq_flow_head { struct fq_flow *first; struct fq_flow *last; }; -struct fq_sched_data { +struct fq_perband_flows { struct fq_flow_head new_flows; - struct fq_flow_head old_flows; + int credit; + int quantum; /* based on band nr : 576KB, 192KB, 64KB */ +}; - struct rb_root delayed; /* for rate limited flows */ - u64 time_next_delayed_flow; - u64 ktime_cache; /* copy of last ktime_get_ns() */ - unsigned long unthrottle_latency_ns; +struct fq_sched_data { +/* Read mostly cache line */ - struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; u32 initial_quantum; u32 flow_refill_delay; @@ -117,24 +122,46 @@ struct fq_sched_data { u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; + u8 prio2band[(TC_PRIO_MAX + 1) >> 2]; + u32 timer_slack; /* hrtimer slack in ns */ + +/* Read/Write fields. */ + + unsigned int band_nr; /* band being serviced in fq_dequeue() */ + + struct fq_perband_flows band_flows[FQ_BANDS]; + + struct fq_flow internal; /* fastpath queue. */ + struct rb_root delayed; /* for rate limited flows */ + u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; + + u32 band_pkt_count[FQ_BANDS]; u32 flows; - u32 inactive_flows; + u32 inactive_flows; /* Flows with no packet to send. */ u32 throttled_flows; - u64 stat_gc_flows; - u64 stat_internal_packets; u64 stat_throttled; + struct qdisc_watchdog watchdog; + u64 stat_gc_flows; + +/* Seldom used fields. */ + + u64 stat_band_drops[FQ_BANDS]; u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; - - u32 timer_slack; /* hrtimer slack in ns */ - struct qdisc_watchdog watchdog; }; +/* return the i-th 2-bit value ("crumb") */ +static u8 fq_prio2band(const u8 *prio2band, unsigned int prio) +{ + return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3; +} + /* * f->tail and f->age share the same location. * We can use the low order bit to differentiate if this location points @@ -159,8 +186,19 @@ static bool fq_flow_is_throttled(const struct fq_flow *f) return f->next == &throttled; } -static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +enum new_flow { + NEW_FLOW, + OLD_FLOW +}; + +static void fq_flow_add_tail(struct fq_sched_data *q, struct fq_flow *flow, + enum new_flow list_sel) { + struct fq_perband_flows *pband = &q->band_flows[flow->band]; + struct fq_flow_head *head = (list_sel == NEW_FLOW) ? + &pband->new_flows : + &pband->old_flows; + if (head->first) head->last->next = flow; else @@ -173,7 +211,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) { rb_erase(&f->rate_node, &q->delayed); q->throttled_flows--; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); } static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) @@ -258,17 +296,61 @@ static void fq_gc(struct fq_sched_data *q, kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); } -static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) +/* Fast path can be used if : + * 1) Packet tstamp is in the past. + * 2) FQ qlen == 0 OR + * (no flow is currently eligible for transmit, + * AND fast path queue has less than 8 packets) + * 3) No SO_MAX_PACING_RATE on the socket (if any). + * 4) No @maxrate attribute on this qdisc, + * + * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. + */ +static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, + u64 now) { + const struct fq_sched_data *q = qdisc_priv(sch); + const struct sock *sk; + + if (fq_skb_cb(skb)->time_to_send > now) + return false; + + if (sch->q.qlen != 0) { + /* Even if some packets are stored in this qdisc, + * we can still enable fast path if all of them are + * scheduled in the future (ie no flows are eligible) + * or in the fast path queue. + */ + if (q->flows != q->inactive_flows + q->throttled_flows) + return false; + + /* Do not allow fast path queue to explode, we want Fair Queue mode + * under pressure. + */ + if (q->internal.qlen >= 8) + return false; + } + + sk = skb->sk; + if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) && + sk->sk_max_pacing_rate != ~0UL) + return false; + + if (q->flow_max_rate != ~0UL) + return false; + + return true; +} + +static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, + u64 now) +{ + struct fq_sched_data *q = qdisc_priv(sch); struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; - /* warning: no starvation prevention... */ - if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) - return &q->internal; - /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, @@ -299,11 +381,18 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) sk = (struct sock *)((hash << 1) | 1UL); } + if (fq_fastpath_check(sch, skb, now)) { + q->internal.stat_fastpath_packets++; + if (skb->sk == sk && q->rate_enable && + READ_ONCE(sk->sk_pacing_status) != SK_PACING_FQ) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + return &q->internal; + } + root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; - if (q->flows >= (2U << q->fq_trees_log) && - q->inactive_flows > q->flows/2) - fq_gc(q, root, sk); + fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; @@ -396,7 +485,6 @@ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, { fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); - flow->qlen--; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } @@ -434,9 +522,9 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) } static bool fq_packet_beyond_horizon(const struct sk_buff *skb, - const struct fq_sched_data *q) + const struct fq_sched_data *q, u64 now) { - return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); + return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -444,53 +532,57 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; + u64 now; + u8 band; - if (unlikely(sch->q.qlen >= sch->limit)) + band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); + if (unlikely(q->band_pkt_count[band] >= sch->limit)) { + q->stat_band_drops[band]++; return qdisc_drop(skb, sch, to_free); + } + now = ktime_get_ns(); if (!skb->tstamp) { - fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); + fq_skb_cb(skb)->time_to_send = now; } else { - /* Check if packet timestamp is too far in the future. - * Try first if our cached value, to avoid ktime_get_ns() - * cost in most cases. - */ - if (fq_packet_beyond_horizon(skb, q)) { - /* Refresh our cache and check another time */ - q->ktime_cache = ktime_get_ns(); - if (fq_packet_beyond_horizon(skb, q)) { - if (q->horizon_drop) { + /* Check if packet timestamp is too far in the future. */ + if (fq_packet_beyond_horizon(skb, q, now)) { + if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop(skb, sch, to_free); - } - q->stat_horizon_caps++; - skb->tstamp = q->ktime_cache + q->horizon; } + q->stat_horizon_caps++; + skb->tstamp = now + q->horizon; } fq_skb_cb(skb)->time_to_send = skb->tstamp; } - f = fq_classify(skb, q); - if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { - q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); - } + f = fq_classify(sch, skb, now); - f->qlen++; - qdisc_qstats_backlog_inc(sch, skb); - if (fq_flow_is_detached(f)) { - fq_flow_add_tail(&q->new_flows, f); - if (time_after(jiffies, f->age + q->flow_refill_delay)) - f->credit = max_t(u32, f->credit, q->quantum); - q->inactive_flows--; + if (f != &q->internal) { + if (unlikely(f->qlen >= q->flow_plimit)) { + q->stat_flows_plimit++; + return qdisc_drop(skb, sch, to_free); + } + + if (fq_flow_is_detached(f)) { + fq_flow_add_tail(q, f, NEW_FLOW); + if (time_after(jiffies, f->age + q->flow_refill_delay)) + f->credit = max_t(u32, f->credit, q->quantum); + } + + f->band = band; + q->band_pkt_count[band]++; + fq_skb_cb(skb)->band = band; + if (f->qlen == 0) + q->inactive_flows--; } + f->qlen++; /* Note: this overwrites f->age */ flow_queue_add(f, skb); - if (unlikely(f == &q->internal)) { - q->stat_internal_packets++; - } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -523,13 +615,26 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) } } +static struct fq_flow_head *fq_pband_head_select(struct fq_perband_flows *pband) +{ + if (pband->credit <= 0) + return NULL; + + if (pband->new_flows.first) + return &pband->new_flows; + + return pband->old_flows.first ? &pband->old_flows : NULL; +} + static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); + struct fq_perband_flows *pband; struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; unsigned long rate; + int retry; u32 plen; u64 now; @@ -538,30 +643,38 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) skb = fq_peek(&q->internal); if (unlikely(skb)) { + q->internal.qlen--; fq_dequeue_skb(sch, &q->internal, skb); goto out; } - q->ktime_cache = now = ktime_get_ns(); + now = ktime_get_ns(); fq_check_throttled(q, now); + retry = 0; + pband = &q->band_flows[q->band_nr]; begin: - head = &q->new_flows; - if (!head->first) { - head = &q->old_flows; - if (!head->first) { - if (q->time_next_delayed_flow != ~0ULL) - qdisc_watchdog_schedule_range_ns(&q->watchdog, + head = fq_pband_head_select(pband); + if (!head) { + while (++retry <= FQ_BANDS) { + if (++q->band_nr == FQ_BANDS) + q->band_nr = 0; + pband = &q->band_flows[q->band_nr]; + pband->credit = min(pband->credit + pband->quantum, + pband->quantum); + goto begin; + } + if (q->time_next_delayed_flow != ~0ULL) + qdisc_watchdog_schedule_range_ns(&q->watchdog, q->time_next_delayed_flow, q->timer_slack); - return NULL; - } + return NULL; } f = head->first; - + retry = 0; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); goto begin; } @@ -581,20 +694,23 @@ begin: INET_ECN_set_ce(skb); q->stat_ce_mark++; } + if (--f->qlen == 0) + q->inactive_flows++; + q->band_pkt_count[fq_skb_cb(skb)->band]--; fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ - if ((head == &q->new_flows) && q->old_flows.first) { - fq_flow_add_tail(&q->old_flows, f); + if (head == &pband->new_flows) { + fq_flow_add_tail(q, f, OLD_FLOW); } else { fq_flow_set_detached(f); - q->inactive_flows++; } goto begin; } plen = qdisc_pkt_len(skb); f->credit -= plen; + pband->credit -= plen; if (!q->rate_enable) goto out; @@ -607,7 +723,7 @@ begin: */ if (!skb->tstamp) { if (skb->sk) - rate = min(skb->sk->sk_pacing_rate, rate); + rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate); if (rate <= q->low_rate_threshold) { f->credit = 0; @@ -686,8 +802,10 @@ static void fq_reset(struct Qdisc *sch) kmem_cache_free(fq_flow_cachep, f); } } - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (idx = 0; idx < FQ_BANDS; idx++) { + q->band_flows[idx].new_flows.first = NULL; + q->band_flows[idx].old_flows.first = NULL; + } q->delayed = RB_ROOT; q->flows = 0; q->inactive_flows = 0; @@ -779,7 +897,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; } -static struct netlink_range_validation iq_range = { +static const struct netlink_range_validation iq_range = { .max = INT_MAX, }; @@ -801,8 +919,71 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, + [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), + [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), }; +/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ +static void fq_prio2band_compress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + memset(out, 0, num_elems / 4); + for (i = 0; i < num_elems; i++) + out[i / 4] |= in[i] << (2 * (i & 0x3)); +} + +static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + for (i = 0; i < num_elems; i++) + out[i] = fq_prio2band(in, i); +} + +static int fq_load_weights(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + s32 *weights = nla_data(attr); + int i; + + for (i = 0; i < FQ_BANDS; i++) { + if (weights[i] < FQ_MIN_WEIGHT) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Weight %d less that minimum allowed %d", + weights[i], FQ_MIN_WEIGHT); + return -EINVAL; + } + } + for (i = 0; i < FQ_BANDS; i++) + q->band_flows[i].quantum = weights[i]; + return 0; +} + +static int fq_load_priomap(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const struct tc_prio_qopt *map = nla_data(attr); + int i; + + if (map->bands != FQ_BANDS) { + NL_SET_ERR_MSG_MOD(extack, "FQ only supports 3 bands"); + return -EINVAL; + } + for (i = 0; i < TC_PRIO_MAX + 1; i++) { + if (map->priomap[i] >= FQ_BANDS) { + NL_SET_ERR_MSG_FMT_MOD(extack, "FQ priomap field %d maps to a too high band %d", + i, map->priomap[i]); + return -EINVAL; + } + } + fq_prio2band_compress_crumb(map->priomap, q->prio2band); + return 0; +} + static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -877,6 +1058,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (!err && tb[TCA_FQ_PRIOMAP]) + err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack); + + if (!err && tb[TCA_FQ_WEIGHTS]) + err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack); + if (tb[TCA_FQ_ORPHAN_MASK]) q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); @@ -928,7 +1115,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); - int err; + int i, err; sch->limit = 10000; q->flow_plimit = 100; @@ -938,8 +1125,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->flow_max_rate = ~0UL; q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (i = 0; i < FQ_BANDS; i++) { + q->band_flows[i].new_flows.first = NULL; + q->band_flows[i].old_flows.first = NULL; + } + q->band_flows[0].quantum = 9 << 16; + q->band_flows[1].quantum = 3 << 16; + q->band_flows[2].quantum = 1 << 16; q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); @@ -954,6 +1146,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; + fq_prio2band_compress_crumb(sch_default_prio2band, q->prio2band); qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); if (opt) @@ -968,8 +1161,12 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); u64 ce_threshold = q->ce_threshold; + struct tc_prio_qopt prio = { + .bands = FQ_BANDS, + }; u64 horizon = q->horizon; struct nlattr *opts; + s32 weights[3]; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) @@ -999,6 +1196,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) goto nla_put_failure; + fq_prio2band_decompress_crumb(q->prio2band, prio.priomap); + if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) + goto nla_put_failure; + + weights[0] = q->band_flows[0].quantum; + weights[1] = q->band_flows[1].quantum; + weights[2] = q->band_flows[2].quantum; + if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights)) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -1009,11 +1216,15 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_fq_qd_stats st; + int i; + + st.pad = 0; sch_tree_lock(sch); st.gc_flows = q->stat_gc_flows; - st.highprio_packets = q->stat_internal_packets; + st.highprio_packets = 0; + st.fastpath_packets = q->internal.stat_fastpath_packets; st.tcp_retrans = 0; st.throttled = q->stat_throttled; st.flows_plimit = q->stat_flows_plimit; @@ -1029,6 +1240,10 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.ce_mark = q->stat_ce_mark; st.horizon_drops = q->stat_horizon_drops; st.horizon_caps = q->stat_horizon_caps; + for (i = 0; i < FQ_BANDS; i++) { + st.band_drops[i] = q->stat_band_drops[i]; + st.band_pkt_count[i] = q->band_pkt_count[i]; + } sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); @@ -1056,7 +1271,7 @@ static int __init fq_module_init(void) fq_flow_cachep = kmem_cache_create("fq_flow_cache", sizeof(struct fq_flow), - 0, 0, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!fq_flow_cachep) return -ENOMEM; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 68e6acd0f1..5b595773e5 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -202,7 +202,7 @@ out: return NET_XMIT_CN; } -static struct netlink_range_validation fq_pie_q_range = { +static const struct netlink_range_validation fq_pie_q_range = { .min = 1, .max = 1 << 20, }; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a2358..ce63414185 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; @@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d7e23f4cc..4195a4bc26 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -694,9 +694,10 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static const u8 prio2band[TC_PRIO_MAX + 1] = { - 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = { + 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }; +EXPORT_SYMBOL(sch_default_prio2band); /* 3-band FIFO queue: old style, but should be a bit faster than generic prio+fifo combination. @@ -721,7 +722,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; + int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); unsigned int pkt_len = qdisc_pkt_len(skb); @@ -830,7 +831,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) { struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); + memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 872d127c9d..8c61eb3dc9 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -945,3 +945,4 @@ module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic Random Early Detection qdisc"); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 880c5f16b2..16c45da403 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1693,5 +1693,6 @@ hfsc_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler"); module_init(hfsc_init); module_exit(hfsc_cleanup); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0d947414e6..7349233eaa 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void) module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler"); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a463a63192..5fa9eaa79b 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -370,3 +370,4 @@ module_exit(ingress_module_exit); MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs"); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 793009f445..43e53ee00a 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -789,3 +789,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c index 83b3793c40..b3a5572c16 100644 --- a/net/sched/sch_mqprio_lib.c +++ b/net/sched/sch_mqprio_lib.c @@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE], EXPORT_SYMBOL_GPL(mqprio_fp_to_offload); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio"); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 75c9c86018..d66d5f0ec0 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -410,3 +410,4 @@ module_init(multiq_module_init) module_exit(multiq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4ad39a4a3c..fa678eb885 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -67,7 +67,7 @@ struct disttable { u32 size; - s16 table[]; + s16 table[] __counted_by(size); }; struct netem_sched_data { @@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void) module_init(netem_module_init) module_exit(netem_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network characteristics emulator qdisc"); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 35f49edf63..992f0c8d79 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -226,3 +226,4 @@ static void __exit plug_module_exit(void) module_init(plug_module_init) module_exit(plug_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fdc5ef52c3..8ecdd3ef6f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -433,3 +433,4 @@ module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple 3-band priority qdisc"); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 546c10adca..48a604c320 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -213,7 +213,7 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static struct netlink_range_validation lmax_range = { +static const struct netlink_range_validation lmax_range = { .min = QFQ_MIN_LMAX, .max = QFQ_MAX_LMAX, }; @@ -1003,7 +1003,7 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, *cl = list_first_entry(&agg->active, struct qfq_class, alist); skb = (*cl)->qdisc->ops->peek((*cl)->qdisc); if (skb == NULL) - WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); + qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc); else *len = qdisc_pkt_len(skb); @@ -1535,3 +1535,4 @@ static void __exit qfq_exit(void) module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16277b6a02..607b6c8b3a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -563,3 +563,4 @@ module_init(red_module_init) module_exit(red_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Random Early Detection qdisc"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 66dcb18638..eb77558fa3 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void) module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Stochastic Fairness qdisc"); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 5df2dacb7b..28beb11762 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -307,3 +307,4 @@ module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1cb5e41c0e..31a8252bd0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1015,7 +1015,7 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { TC_FP_PREEMPTIBLE), }; -static struct netlink_range_validation_signed taprio_cycle_time_range = { +static const struct netlink_range_validation_signed taprio_cycle_time_range = { .min = 0, .max = INT_MAX, }; @@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void) module_init(taprio_module_init); module_exit(taprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Time Aware Priority qdisc"); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 17d2d00ddb..dd6b1a723b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void) module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Token Bucket Filter qdisc"); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 7721239c18..59304611dc 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -523,3 +523,4 @@ module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); -- cgit v1.2.3