diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_api.c | 3 | ||||
-rw-r--r-- | net/sched/act_ct.c | 26 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 36 | ||||
-rw-r--r-- | net/sched/cls_api.c | 41 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 134 | ||||
-rw-r--r-- | net/sched/sch_api.c | 3 | ||||
-rw-r--r-- | net/sched/sch_cake.c | 112 | ||||
-rw-r--r-- | net/sched/sch_cbs.c | 20 | ||||
-rw-r--r-- | net/sched/sch_choke.c | 21 | ||||
-rw-r--r-- | net/sched/sch_codel.c | 29 | ||||
-rw-r--r-- | net/sched/sch_etf.c | 10 | ||||
-rw-r--r-- | net/sched/sch_ets.c | 25 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 13 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 108 | ||||
-rw-r--r-- | net/sched/sch_fq_codel.c | 57 | ||||
-rw-r--r-- | net/sched/sch_fq_pie.c | 61 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 16 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 9 | ||||
-rw-r--r-- | net/sched/sch_hhf.c | 35 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 22 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 12 | ||||
-rw-r--r-- | net/sched/sch_mqprio.c | 6 | ||||
-rw-r--r-- | net/sched/sch_multiq.c | 2 | ||||
-rw-r--r-- | net/sched/sch_pie.c | 39 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 13 | ||||
-rw-r--r-- | net/sched/sch_skbprio.c | 8 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 34 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 4 |
28 files changed, 572 insertions, 327 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb11..2520708b06 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, u32 max; if (*index) { -again: rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); @@ -839,7 +838,7 @@ again: * index but did not assign the pointer yet. */ rcu_read_unlock(); - goto again; + return -EAGAIN; } if (!p) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index baac083fd8..9d451d77d5 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -41,21 +41,28 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; static DEFINE_MUTEX(zones_mutex); +struct zones_ht_key { + struct net *net; + u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; +}; + struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; refcount_t ref; - u16 zone; + struct zones_ht_key key; bool dying; }; static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), - .key_offset = offsetof(struct tcf_ct_flow_table, zone), - .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .key_offset = offsetof(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; @@ -316,11 +323,12 @@ static struct nf_flowtable_type flowtable_ct = { static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { + struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) goto out_unlock; @@ -329,7 +337,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) goto err_alloc; refcount_set(&ct_ft->ref, 1); - ct_ft->zone = params->zone; + ct_ft->key = key; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); if (err) goto err_insert; @@ -1071,6 +1079,14 @@ do_nat: */ if (nf_conntrack_confirm(skb) != NF_ACCEPT) goto drop; + + /* The ct may be dropped if a clash has been resolved, + * so it's necessary to retrieve it from skb again to + * prevent UAF. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + skip_add = true; } if (!skip_add) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 1536f8b16f..af7c998459 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -230,7 +230,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, nla_for_each_attr(attr, head, len, rem) { switch (nla_type(attr)) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) { + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } @@ -247,7 +247,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, dst_len -= opt_len; dst += opt_len; } - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: if (type) { @@ -259,7 +259,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: if (type) { @@ -271,7 +271,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; } } @@ -302,7 +302,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, switch (nla_type(nla_data(nla))) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -310,7 +310,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -318,7 +318,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -363,6 +363,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *metadata = NULL; struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; @@ -371,7 +372,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, __be16 dst_port = 0; __be64 key_id = 0; int opts_len = 0; - __be16 flags = 0; u8 tos, ttl; int ret = 0; u32 index; @@ -412,16 +412,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); key_id = key32_to_tunnel_id(key32); - flags = TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, flags); } - flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, flags); if (tb[TCA_TUNNEL_KEY_NO_CSUM] && nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) - flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, flags); if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG])) - flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags); if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); @@ -663,15 +663,15 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!start) return -EMSGSIZE; - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_geneve_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_vxlan_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_erspan_opts_dump(skb, info); if (err) goto err_out; @@ -741,7 +741,7 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); - if (((key->tun_flags & TUNNEL_KEY) && + if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) && nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || @@ -749,8 +749,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, - !(key->tun_flags & TUNNEL_CSUM)) || - ((key->tun_flags & TUNNEL_DONT_FRAGMENT) && + !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) || + (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) && nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ca5676b266..17d97bbe89 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp) refcount_inc(&tp->refcnt); } +static void tcf_maintain_bypass(struct tcf_block *block) +{ + int filtercnt = atomic_read(&block->filtercnt); + int skipswcnt = atomic_read(&block->skipswcnt); + bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; + + if (bypass_wanted != block->bypass_wanted) { +#ifdef CONFIG_NET_CLS_ACT + if (bypass_wanted) + static_branch_inc(&tcf_bypass_check_needed_key); + else + static_branch_dec(&tcf_bypass_check_needed_key); +#endif + block->bypass_wanted = bypass_wanted; + } +} + +static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add) +{ + lockdep_assert_not_held(&block->cb_lock); + + down_write(&block->cb_lock); + if (*counted != add) { + if (add) { + atomic_inc(&block->filtercnt); + *counted = true; + } else { + atomic_dec(&block->filtercnt); + *counted = false; + } + } + tcf_maintain_bypass(block); + up_write(&block->cb_lock); +} + static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); + tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); @@ -2367,6 +2403,7 @@ replay: tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); + tcf_block_filter_cnt_update(block, &tp->counted, true); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; @@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(*flags)) + atomic_inc(&block->skipswcnt); atomic_inc(&block->offloadcnt); } @@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(*flags)) + atomic_dec(&block->skipswcnt); atomic_dec(&block->offloadcnt); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e1314674b4..fd9a6f20b6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -28,6 +28,7 @@ #include <net/vxlan.h> #include <net/erspan.h> #include <net/gtp.h> +#include <net/pfcp.h> #include <net/tc_wrapper.h> #include <net/dst.h> @@ -741,6 +742,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_PFCP] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -771,6 +773,12 @@ gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = { }; static const struct nla_policy +pfcp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID] = { .type = NLA_U64 }, +}; + +static const struct nla_policy mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 }, @@ -1419,6 +1427,44 @@ static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key, return sizeof(*sinfo); } +static int fl_set_pfcp_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1]; + struct pfcp_metadata *md; + int err; + + md = (struct pfcp_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_PFCP) { + NL_SET_ERR_MSG_MOD(extack, "Non-pfcp option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX, nla, + pfcp_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel key pfcp option type"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) + md->type = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]); + + if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]) + md->seid = nla_get_be64(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]); + + return sizeof(*md); +} + static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1454,12 +1500,13 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, switch (nla_type(nla_opt_key)) { case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: if (key->enc_opts.dst_opt_type && - key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) { + key->enc_opts.dst_opt_type != + IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1470,7 +1517,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1489,7 +1536,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1500,7 +1547,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1519,7 +1566,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1530,7 +1577,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1550,7 +1597,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1561,7 +1608,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1575,6 +1622,36 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } break; + case TCA_FLOWER_KEY_ENC_OPTS_PFCP: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG_MOD(extack, "Duplicate type for pfcp options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT; + option_len = fl_set_pfcp_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT; + option_len = fl_set_pfcp_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG_MOD(extack, "Key and mask miss aligned"); + return -EINVAL; + } + break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; @@ -3117,6 +3194,32 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_pfcp_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct pfcp_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_PFCP); + if (!nest) + goto nla_put_failure; + + md = (struct pfcp_metadata *)&enc_opts->data[0]; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE, md->type)) + goto nla_put_failure; + + if (nla_put_be64(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID, + md->seid, 0)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fl_dump_key_ct(struct sk_buff *skb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask) @@ -3202,26 +3305,31 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, goto nla_put_failure; switch (enc_opts->dst_opt_type) { - case TUNNEL_GENEVE_OPT: + case IP_TUNNEL_GENEVE_OPT_BIT: err = fl_dump_key_geneve_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_VXLAN_OPT: + case IP_TUNNEL_VXLAN_OPT_BIT: err = fl_dump_key_vxlan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_ERSPAN_OPT: + case IP_TUNNEL_ERSPAN_OPT_BIT: err = fl_dump_key_erspan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_GTP_OPT: + case IP_TUNNEL_GTP_OPT_BIT: err = fl_dump_key_gtp_opt(skb, enc_opts); if (err) goto nla_put_failure; break; + case IP_TUNNEL_PFCP_OPT_BIT: + err = fl_dump_key_pfcp_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; default: goto nla_put_failure; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 60239378d4..74afc21052 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * before again attaching a qdisc. */ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { - dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN); netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } @@ -1389,6 +1389,7 @@ err_out4: ops->destroy(sch); qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: + lockdep_unregister_key(&sch->root_lock_key); netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index edee926ccd..9602dafe32 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1512,7 +1512,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) if (!q->overflow_timeout) { int i; /* Build fresh max-heap */ - for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--) + for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2 - 1; i >= 0; i--) cake_heapify(q, i); } q->overflow_timeout = 65535; @@ -2572,6 +2572,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, { struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CAKE_MAX + 1]; + u16 rate_flags; + u8 flow_mode; int err; err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, @@ -2579,10 +2581,11 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; + flow_mode = q->flow_mode; if (tb[TCA_CAKE_NAT]) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - q->flow_mode &= ~CAKE_FLOW_NAT_FLAG; - q->flow_mode |= CAKE_FLOW_NAT_FLAG * + flow_mode &= ~CAKE_FLOW_NAT_FLAG; + flow_mode |= CAKE_FLOW_NAT_FLAG * !!nla_get_u32(tb[TCA_CAKE_NAT]); #else NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT], @@ -2592,29 +2595,34 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_BASE_RATE64]) - q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]); + WRITE_ONCE(q->rate_bps, + nla_get_u64(tb[TCA_CAKE_BASE_RATE64])); if (tb[TCA_CAKE_DIFFSERV_MODE]) - q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]); + WRITE_ONCE(q->tin_mode, + nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE])); + rate_flags = q->rate_flags; if (tb[TCA_CAKE_WASH]) { if (!!nla_get_u32(tb[TCA_CAKE_WASH])) - q->rate_flags |= CAKE_FLAG_WASH; + rate_flags |= CAKE_FLAG_WASH; else - q->rate_flags &= ~CAKE_FLAG_WASH; + rate_flags &= ~CAKE_FLAG_WASH; } if (tb[TCA_CAKE_FLOW_MODE]) - q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) | + flow_mode = ((flow_mode & CAKE_FLOW_NAT_FLAG) | (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) & CAKE_FLOW_MASK)); if (tb[TCA_CAKE_ATM]) - q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]); + WRITE_ONCE(q->atm_mode, + nla_get_u32(tb[TCA_CAKE_ATM])); if (tb[TCA_CAKE_OVERHEAD]) { - q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]); - q->rate_flags |= CAKE_FLAG_OVERHEAD; + WRITE_ONCE(q->rate_overhead, + nla_get_s32(tb[TCA_CAKE_OVERHEAD])); + rate_flags |= CAKE_FLAG_OVERHEAD; q->max_netlen = 0; q->max_adjlen = 0; @@ -2623,7 +2631,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_RAW]) { - q->rate_flags &= ~CAKE_FLAG_OVERHEAD; + rate_flags &= ~CAKE_FLAG_OVERHEAD; q->max_netlen = 0; q->max_adjlen = 0; @@ -2632,54 +2640,58 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_MPU]) - q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]); + WRITE_ONCE(q->rate_mpu, + nla_get_u32(tb[TCA_CAKE_MPU])); if (tb[TCA_CAKE_RTT]) { - q->interval = nla_get_u32(tb[TCA_CAKE_RTT]); + u32 interval = nla_get_u32(tb[TCA_CAKE_RTT]); - if (!q->interval) - q->interval = 1; + WRITE_ONCE(q->interval, max(interval, 1U)); } if (tb[TCA_CAKE_TARGET]) { - q->target = nla_get_u32(tb[TCA_CAKE_TARGET]); + u32 target = nla_get_u32(tb[TCA_CAKE_TARGET]); - if (!q->target) - q->target = 1; + WRITE_ONCE(q->target, max(target, 1U)); } if (tb[TCA_CAKE_AUTORATE]) { if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE])) - q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS; + rate_flags |= CAKE_FLAG_AUTORATE_INGRESS; else - q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS; + rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS; } if (tb[TCA_CAKE_INGRESS]) { if (!!nla_get_u32(tb[TCA_CAKE_INGRESS])) - q->rate_flags |= CAKE_FLAG_INGRESS; + rate_flags |= CAKE_FLAG_INGRESS; else - q->rate_flags &= ~CAKE_FLAG_INGRESS; + rate_flags &= ~CAKE_FLAG_INGRESS; } if (tb[TCA_CAKE_ACK_FILTER]) - q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]); + WRITE_ONCE(q->ack_filter, + nla_get_u32(tb[TCA_CAKE_ACK_FILTER])); if (tb[TCA_CAKE_MEMORY]) - q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]); + WRITE_ONCE(q->buffer_config_limit, + nla_get_u32(tb[TCA_CAKE_MEMORY])); if (tb[TCA_CAKE_SPLIT_GSO]) { if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO])) - q->rate_flags |= CAKE_FLAG_SPLIT_GSO; + rate_flags |= CAKE_FLAG_SPLIT_GSO; else - q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; + rate_flags &= ~CAKE_FLAG_SPLIT_GSO; } if (tb[TCA_CAKE_FWMARK]) { - q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); - q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; + WRITE_ONCE(q->fwmark_mask, nla_get_u32(tb[TCA_CAKE_FWMARK])); + WRITE_ONCE(q->fwmark_shft, + q->fwmark_mask ? __ffs(q->fwmark_mask) : 0); } + WRITE_ONCE(q->rate_flags, rate_flags); + WRITE_ONCE(q->flow_mode, flow_mode); if (q->tins) { sch_tree_lock(sch); cake_reconfigure(sch); @@ -2774,68 +2786,72 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) { struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *opts; + u16 rate_flags; + u8 flow_mode; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; - if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps, - TCA_CAKE_PAD)) + if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, + READ_ONCE(q->rate_bps), TCA_CAKE_PAD)) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, - q->flow_mode & CAKE_FLOW_MASK)) + flow_mode = READ_ONCE(q->flow_mode); + if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, flow_mode & CAKE_FLOW_MASK)) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval)) + if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target)) + if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit)) + if (nla_put_u32(skb, TCA_CAKE_MEMORY, + READ_ONCE(q->buffer_config_limit))) goto nla_put_failure; + rate_flags = READ_ONCE(q->rate_flags); if (nla_put_u32(skb, TCA_CAKE_AUTORATE, - !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS))) + !!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS))) goto nla_put_failure; if (nla_put_u32(skb, TCA_CAKE_INGRESS, - !!(q->rate_flags & CAKE_FLAG_INGRESS))) + !!(rate_flags & CAKE_FLAG_INGRESS))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter)) + if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter))) goto nla_put_failure; if (nla_put_u32(skb, TCA_CAKE_NAT, - !!(q->flow_mode & CAKE_FLOW_NAT_FLAG))) + !!(flow_mode & CAKE_FLOW_NAT_FLAG))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode)) + if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode))) goto nla_put_failure; if (nla_put_u32(skb, TCA_CAKE_WASH, - !!(q->rate_flags & CAKE_FLAG_WASH))) + !!(rate_flags & CAKE_FLAG_WASH))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead)) + if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, READ_ONCE(q->rate_overhead))) goto nla_put_failure; - if (!(q->rate_flags & CAKE_FLAG_OVERHEAD)) + if (!(rate_flags & CAKE_FLAG_OVERHEAD)) if (nla_put_u32(skb, TCA_CAKE_RAW, 0)) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode)) + if (nla_put_u32(skb, TCA_CAKE_ATM, READ_ONCE(q->atm_mode))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu)) + if (nla_put_u32(skb, TCA_CAKE_MPU, READ_ONCE(q->rate_mpu))) goto nla_put_failure; if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO, - !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) + !!(rate_flags & CAKE_FLAG_SPLIT_GSO))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) + if (nla_put_u32(skb, TCA_CAKE_FWMARK, READ_ONCE(q->fwmark_mask))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 69001eff03..939425da18 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, } /* Everything went OK, save the parameters used. */ - q->hicredit = qopt->hicredit; - q->locredit = qopt->locredit; - q->idleslope = qopt->idleslope * BYTES_PER_KBIT; - q->sendslope = qopt->sendslope * BYTES_PER_KBIT; - q->offload = qopt->offload; + WRITE_ONCE(q->hicredit, qopt->hicredit); + WRITE_ONCE(q->locredit, qopt->locredit); + WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT); + WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT); + WRITE_ONCE(q->offload, qopt->offload); return 0; } @@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) if (!nest) goto nla_put_failure; - opt.hicredit = q->hicredit; - opt.locredit = q->locredit; - opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); - opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); - opt.offload = q->offload; + opt.hicredit = READ_ONCE(q->hicredit); + opt.locredit = READ_ONCE(q->locredit); + opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT); + opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT); + opt.offload = READ_ONCE(q->offload); if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ea108030c6..9107201092 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -405,8 +405,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, } else sch_tree_lock(sch); - q->flags = ctl->flags; - q->limit = ctl->limit; + WRITE_ONCE(q->flags, ctl->flags); + WRITE_ONCE(q->limit, ctl->limit); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, @@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt, static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) { struct choke_sched_data *q = qdisc_priv(sch); + u8 Wlog = READ_ONCE(q->parms.Wlog); struct nlattr *opts = NULL; struct tc_red_qopt opt = { - .limit = q->limit, - .flags = q->flags, - .qth_min = q->parms.qth_min >> q->parms.Wlog, - .qth_max = q->parms.qth_max >> q->parms.Wlog, - .Wlog = q->parms.Wlog, - .Plog = q->parms.Plog, - .Scell_log = q->parms.Scell_log, + .limit = READ_ONCE(q->limit), + .flags = READ_ONCE(q->flags), + .qth_min = READ_ONCE(q->parms.qth_min) >> Wlog, + .qth_max = READ_ONCE(q->parms.qth_max) >> Wlog, + .Wlog = Wlog, + .Plog = READ_ONCE(q->parms.Plog), + .Scell_log = READ_ONCE(q->parms.Scell_log), }; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -447,7 +448,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) || - nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P)) + nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index ecb3f164bb..3e8d4fe4d9 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_CODEL_TARGET]) { u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]); - q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->params.target, + ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_CE_THRESHOLD]) { u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]); - q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->params.ce_threshold, + (val * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_INTERVAL]) { u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); - q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->params.interval, + ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]); + WRITE_ONCE(sch->limit, + nla_get_u32(tb[TCA_CODEL_LIMIT])); if (tb[TCA_CODEL_ECN]) - q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]); + WRITE_ONCE(q->params.ecn, + !!nla_get_u32(tb[TCA_CODEL_ECN])); qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { @@ -183,6 +188,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt, static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) { struct codel_sched_data *q = qdisc_priv(sch); + codel_time_t ce_threshold; struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -190,17 +196,18 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (nla_put_u32(skb, TCA_CODEL_TARGET, - codel_time_to_us(q->params.target)) || + codel_time_to_us(READ_ONCE(q->params.target))) || nla_put_u32(skb, TCA_CODEL_LIMIT, - sch->limit) || + READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_CODEL_INTERVAL, - codel_time_to_us(q->params.interval)) || + codel_time_to_us(READ_ONCE(q->params.interval))) || nla_put_u32(skb, TCA_CODEL_ECN, - q->params.ecn)) + READ_ONCE(q->params.ecn))) goto nla_put_failure; - if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD && + ce_threshold = READ_ONCE(q->params.ce_threshold); + if (ce_threshold != CODEL_DISABLED_THRESHOLD && nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD, - codel_time_to_us(q->params.ce_threshold))) + codel_time_to_us(ce_threshold))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 2e4bef713b..c74d778c32 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -467,15 +467,15 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) if (!nest) goto nla_put_failure; - opt.delta = q->delta; - opt.clockid = q->clockid; - if (q->offload) + opt.delta = READ_ONCE(q->delta); + opt.clockid = READ_ONCE(q->clockid); + if (READ_ONCE(q->offload)) opt.flags |= TC_ETF_OFFLOAD_ON; - if (q->deadline_mode) + if (READ_ONCE(q->deadline_mode)) opt.flags |= TC_ETF_DEADLINE_MODE_ON; - if (q->skip_sock_check) + if (READ_ONCE(q->skip_sock_check)) opt.flags |= TC_ETF_SKIP_SOCK_CHECK; if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 835b4460b4..f80bc05d4c 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -646,7 +646,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); - q->nbands = nbands; + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { list_add_tail(&q->classes[i].alist, &q->active); @@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, list_del(&q->classes[i].alist); qdisc_tree_flush_backlog(q->classes[i].qdisc); } - q->nstrict = nstrict; + WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); for (i = 0; i < q->nbands; i++) - q->classes[i].quantum = quanta[i]; + WRITE_ONCE(q->classes[i].quantum, quanta[i]); for (i = oldbands; i < q->nbands; i++) { q->classes[i].qdisc = queues[i]; @@ -676,7 +676,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, for (i = q->nbands; i < oldbands; i++) { qdisc_put(q->classes[i].qdisc); q->classes[i].qdisc = NULL; - q->classes[i].quantum = 0; + WRITE_ONCE(q->classes[i].quantum, 0); q->classes[i].deficit = 0; gnet_stats_basic_sync_init(&q->classes[i].bstats); memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); @@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb) struct ets_sched *q = qdisc_priv(sch); struct nlattr *opts; struct nlattr *nest; + u8 nbands, nstrict; int band; int prio; int err; @@ -745,21 +746,22 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb) if (!opts) goto nla_err; - if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands)) + nbands = READ_ONCE(q->nbands); + if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands)) goto nla_err; - if (q->nstrict && - nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict)) + nstrict = READ_ONCE(q->nstrict); + if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict)) goto nla_err; - if (q->nbands > q->nstrict) { + if (nbands > nstrict) { nest = nla_nest_start(skb, TCA_ETS_QUANTA); if (!nest) goto nla_err; - for (band = q->nstrict; band < q->nbands; band++) { + for (band = nstrict; band < nbands; band++) { if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, - q->classes[band].quantum)) + READ_ONCE(q->classes[band].quantum))) goto nla_err; } @@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_err; for (prio = 0; prio <= TC_PRIO_MAX; prio++) { - if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio])) + if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, + READ_ONCE(q->prio2band[prio]))) goto nla_err; } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 450f5c67ac..b50b2c2cc0 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -19,7 +19,8 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= + READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -28,7 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(sch->q.qlen < sch->limit)) + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -39,7 +40,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(sch->q.qlen < sch->limit)) + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; @@ -105,14 +106,14 @@ static int __fifo_init(struct Qdisc *sch, struct nlattr *opt, if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); - sch->limit = limit; + WRITE_ONCE(sch->limit, limit); } else { struct tc_fifo_qopt *ctl = nla_data(opt); if (nla_len(opt) < sizeof(*ctl)) return -EINVAL; - sch->limit = ctl->limit; + WRITE_ONCE(sch->limit, ctl->limit); } if (is_bfifo) @@ -154,7 +155,7 @@ static void fifo_destroy(struct Qdisc *sch) static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { - struct tc_fifo_qopt opt = { .limit = sch->limit }; + struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) }; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index cdf23ff16f..2389747256 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -106,6 +106,8 @@ struct fq_perband_flows { int quantum; /* based on band nr : 576KB, 192KB, 64KB */ }; +#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2) + struct fq_sched_data { /* Read mostly cache line */ @@ -122,7 +124,7 @@ struct fq_sched_data { u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; - u8 prio2band[(TC_PRIO_MAX + 1) >> 2]; + u8 prio2band[FQ_PRIO2BAND_CRUMB_SIZE]; u32 timer_slack; /* hrtimer slack in ns */ /* Read/Write fields. */ @@ -159,7 +161,7 @@ struct fq_sched_data { /* return the i-th 2-bit value ("crumb") */ static u8 fq_prio2band(const u8 *prio2band, unsigned int prio) { - return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3; + return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3; } /* @@ -888,7 +890,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); q->fq_root = array; - q->fq_trees_log = log; + WRITE_ONCE(q->fq_trees_log, log); sch_tree_unlock(sch); @@ -927,11 +929,15 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { static void fq_prio2band_compress_crumb(const u8 *in, u8 *out) { const int num_elems = TC_PRIO_MAX + 1; + u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE]; int i; - memset(out, 0, num_elems / 4); + memset(tmp, 0, sizeof(tmp)); for (i = 0; i < num_elems; i++) - out[i / 4] |= in[i] << (2 * (i & 0x3)); + tmp[i / 4] |= in[i] << (2 * (i & 0x3)); + + for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++) + WRITE_ONCE(out[i], tmp[i]); } static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) @@ -958,7 +964,7 @@ static int fq_load_weights(struct fq_sched_data *q, } } for (i = 0; i < FQ_BANDS; i++) - q->band_flows[i].quantum = weights[i]; + WRITE_ONCE(q->band_flows[i].quantum, weights[i]); return 0; } @@ -1011,16 +1017,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, err = -EINVAL; } if (tb[TCA_FQ_PLIMIT]) - sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]); + WRITE_ONCE(sch->limit, + nla_get_u32(tb[TCA_FQ_PLIMIT])); if (tb[TCA_FQ_FLOW_PLIMIT]) - q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); + WRITE_ONCE(q->flow_plimit, + nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT])); if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (quantum > 0 && quantum <= (1 << 20)) { - q->quantum = quantum; + WRITE_ONCE(q->quantum, quantum); } else { NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); err = -EINVAL; @@ -1028,7 +1036,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + WRITE_ONCE(q->initial_quantum, + nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM])); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", @@ -1037,17 +1046,19 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_FLOW_MAX_RATE]) { u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); - q->flow_max_rate = (rate == ~0U) ? ~0UL : rate; + WRITE_ONCE(q->flow_max_rate, + (rate == ~0U) ? ~0UL : rate); } if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) - q->low_rate_threshold = - nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + WRITE_ONCE(q->low_rate_threshold, + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD])); if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); if (enable <= 1) - q->rate_enable = enable; + WRITE_ONCE(q->rate_enable, + enable); else err = -EINVAL; } @@ -1055,7 +1066,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; - q->flow_refill_delay = usecs_to_jiffies(usecs_delay); + WRITE_ONCE(q->flow_refill_delay, + usecs_to_jiffies(usecs_delay)); } if (!err && tb[TCA_FQ_PRIOMAP]) @@ -1065,21 +1077,26 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack); if (tb[TCA_FQ_ORPHAN_MASK]) - q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); + WRITE_ONCE(q->orphan_mask, + nla_get_u32(tb[TCA_FQ_ORPHAN_MASK])); if (tb[TCA_FQ_CE_THRESHOLD]) - q->ce_threshold = (u64)NSEC_PER_USEC * - nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); + WRITE_ONCE(q->ce_threshold, + (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_CE_THRESHOLD])); if (tb[TCA_FQ_TIMER_SLACK]) - q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); + WRITE_ONCE(q->timer_slack, + nla_get_u32(tb[TCA_FQ_TIMER_SLACK])); if (tb[TCA_FQ_HORIZON]) - q->horizon = (u64)NSEC_PER_USEC * - nla_get_u32(tb[TCA_FQ_HORIZON]); + WRITE_ONCE(q->horizon, + (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_HORIZON])); if (tb[TCA_FQ_HORIZON_DROP]) - q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]); + WRITE_ONCE(q->horizon_drop, + nla_get_u8(tb[TCA_FQ_HORIZON_DROP])); if (!err) { @@ -1160,13 +1177,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); - u64 ce_threshold = q->ce_threshold; struct tc_prio_qopt prio = { .bands = FQ_BANDS, }; - u64 horizon = q->horizon; struct nlattr *opts; + u64 ce_threshold; s32 weights[3]; + u64 horizon; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) @@ -1174,35 +1191,48 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ + ce_threshold = READ_ONCE(q->ce_threshold); do_div(ce_threshold, NSEC_PER_USEC); + + horizon = READ_ONCE(q->horizon); do_div(horizon, NSEC_PER_USEC); - if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || - nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || - nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || - nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || - nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || + if (nla_put_u32(skb, TCA_FQ_PLIMIT, + READ_ONCE(sch->limit)) || + nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, + READ_ONCE(q->flow_plimit)) || + nla_put_u32(skb, TCA_FQ_QUANTUM, + READ_ONCE(q->quantum)) || + nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, + READ_ONCE(q->initial_quantum)) || + nla_put_u32(skb, TCA_FQ_RATE_ENABLE, + READ_ONCE(q->rate_enable)) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, - min_t(unsigned long, q->flow_max_rate, ~0U)) || + min_t(unsigned long, + READ_ONCE(q->flow_max_rate), ~0U)) || nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, - jiffies_to_usecs(q->flow_refill_delay)) || - nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) || + nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, + READ_ONCE(q->orphan_mask)) || nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, - q->low_rate_threshold) || + READ_ONCE(q->low_rate_threshold)) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || - nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || - nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) || + nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, + READ_ONCE(q->fq_trees_log)) || + nla_put_u32(skb, TCA_FQ_TIMER_SLACK, + READ_ONCE(q->timer_slack)) || nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || - nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) + nla_put_u8(skb, TCA_FQ_HORIZON_DROP, + READ_ONCE(q->horizon_drop))) goto nla_put_failure; fq_prio2band_decompress_crumb(q->prio2band, prio.priomap); if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) goto nla_put_failure; - weights[0] = q->band_flows[0].quantum; - weights[1] = q->band_flows[1].quantum; - weights[2] = q->band_flows[2].quantum; + weights[0] = READ_ONCE(q->band_flows[0].quantum); + weights[1] = READ_ONCE(q->band_flows[1].quantum); + weights[2] = READ_ONCE(q->band_flows[2].quantum); if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights)) goto nla_put_failure; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 79f9d6de6c..4f908c11ba 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -396,40 +396,49 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_CODEL_TARGET]) { u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]); - q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.target, + (target * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) { u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]); - q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.ce_threshold, + (val * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) - q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]); + WRITE_ONCE(q->cparams.ce_threshold_selector, + nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])); if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) - q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]); + WRITE_ONCE(q->cparams.ce_threshold_mask, + nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])); if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); - q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.interval, + (interval * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_FQ_CODEL_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]); + WRITE_ONCE(sch->limit, + nla_get_u32(tb[TCA_FQ_CODEL_LIMIT])); if (tb[TCA_FQ_CODEL_ECN]) - q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); + WRITE_ONCE(q->cparams.ecn, + !!nla_get_u32(tb[TCA_FQ_CODEL_ECN])); if (quantum) - q->quantum = quantum; + WRITE_ONCE(q->quantum, quantum); if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) - q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + WRITE_ONCE(q->drop_batch_size, + max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]))); if (tb[TCA_FQ_CODEL_MEMORY_LIMIT]) - q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])); + WRITE_ONCE(q->memory_limit, + min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]))); while (sch->q.qlen > sch->limit || q->memory_usage > q->memory_limit) { @@ -522,6 +531,7 @@ init_failure: static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_codel_sched_data *q = qdisc_priv(sch); + codel_time_t ce_threshold; struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET, - codel_time_to_us(q->cparams.target)) || + codel_time_to_us(READ_ONCE(q->cparams.target))) || nla_put_u32(skb, TCA_FQ_CODEL_LIMIT, - sch->limit) || + READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL, - codel_time_to_us(q->cparams.interval)) || + codel_time_to_us(READ_ONCE(q->cparams.interval))) || nla_put_u32(skb, TCA_FQ_CODEL_ECN, - q->cparams.ecn) || + READ_ONCE(q->cparams.ecn)) || nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, - q->quantum) || + READ_ONCE(q->quantum)) || nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE, - q->drop_batch_size) || + READ_ONCE(q->drop_batch_size)) || nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT, - q->memory_limit) || + READ_ONCE(q->memory_limit)) || nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, - q->flows_cnt)) + READ_ONCE(q->flows_cnt))) goto nla_put_failure; - if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) { + ce_threshold = READ_ONCE(q->cparams.ce_threshold); + if (ce_threshold != CODEL_DISABLED_THRESHOLD) { if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, - codel_time_to_us(q->cparams.ce_threshold))) + codel_time_to_us(ce_threshold))) goto nla_put_failure; - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector)) + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, + READ_ONCE(q->cparams.ce_threshold_selector))) goto nla_put_failure; - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask)) + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, + READ_ONCE(q->cparams.ce_threshold_mask))) goto nla_put_failure; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 358cf304f4..c38f33ff80 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -299,8 +299,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_PIE_LIMIT]) { u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]); - q->p_params.limit = limit; - sch->limit = limit; + WRITE_ONCE(q->p_params.limit, limit); + WRITE_ONCE(sch->limit, limit); } if (tb[TCA_FQ_PIE_FLOWS]) { if (q->flows) { @@ -322,39 +322,45 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]); /* convert to pschedtime */ - q->p_params.target = - PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + WRITE_ONCE(q->p_params.target, + PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC)); } /* tupdate is in jiffies */ if (tb[TCA_FQ_PIE_TUPDATE]) - q->p_params.tupdate = - usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])); + WRITE_ONCE(q->p_params.tupdate, + usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]))); if (tb[TCA_FQ_PIE_ALPHA]) - q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]); + WRITE_ONCE(q->p_params.alpha, + nla_get_u32(tb[TCA_FQ_PIE_ALPHA])); if (tb[TCA_FQ_PIE_BETA]) - q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]); + WRITE_ONCE(q->p_params.beta, + nla_get_u32(tb[TCA_FQ_PIE_BETA])); if (tb[TCA_FQ_PIE_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]); + WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM])); if (tb[TCA_FQ_PIE_MEMORY_LIMIT]) - q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); + WRITE_ONCE(q->memory_limit, + nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT])); if (tb[TCA_FQ_PIE_ECN_PROB]) - q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]); + WRITE_ONCE(q->ecn_prob, + nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB])); if (tb[TCA_FQ_PIE_ECN]) - q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]); + WRITE_ONCE(q->p_params.ecn, + nla_get_u32(tb[TCA_FQ_PIE_ECN])); if (tb[TCA_FQ_PIE_BYTEMODE]) - q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]); + WRITE_ONCE(q->p_params.bytemode, + nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE])); if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) - q->p_params.dq_rate_estimator = - nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]); + WRITE_ONCE(q->p_params.dq_rate_estimator, + nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])); /* Drop excess packets if new limit is lower */ while (sch->q.qlen > sch->limit) { @@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) return -EMSGSIZE; /* convert target from pschedtime to us */ - if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) || - nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) || + if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) || + nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) || nla_put_u32(skb, TCA_FQ_PIE_TARGET, - ((u32)PSCHED_TICKS2NS(q->p_params.target)) / + ((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) / NSEC_PER_USEC) || nla_put_u32(skb, TCA_FQ_PIE_TUPDATE, - jiffies_to_usecs(q->p_params.tupdate)) || - nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) || - nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) || - nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) || - nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) || - nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) || - nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) || - nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) || + jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) || + nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) || + nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) || + nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) || + nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, + READ_ONCE(q->memory_limit)) || + nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) || + nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) || + nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) || nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR, - q->p_params.dq_rate_estimator)) + READ_ONCE(q->p_params.dq_rate_estimator))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4a2c763e2d..e22ff003d5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,19 +506,22 @@ static void dev_watchdog(struct timer_list *t) unsigned int timedout_ms = 0; unsigned int i; unsigned long trans_start; + unsigned long oldest_start = jiffies; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); trans_start = READ_ONCE(txq->trans_start); - if (netif_xmit_stopped(txq) && - time_after(jiffies, (trans_start + - dev->watchdog_timeo))) { + if (!netif_xmit_stopped(txq)) + continue; + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { timedout_ms = jiffies_to_msecs(jiffies - trans_start); atomic_long_inc(&txq->trans_timeout); break; } + if (time_after(oldest_start, trans_start)) + oldest_start = trans_start; } if (unlikely(timedout_ms)) { @@ -531,7 +534,7 @@ static void dev_watchdog(struct timer_list *t) netif_unfreeze_queues(dev); } if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + + round_jiffies(oldest_start + dev->watchdog_timeo))) release = false; } @@ -673,6 +676,7 @@ struct Qdisc noop_qdisc = { .qlen = 0, .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), }, + .owner = -1, }; EXPORT_SYMBOL(noop_qdisc); @@ -945,7 +949,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); gnet_stats_basic_sync_init(&sch->bstats); + lockdep_register_key(&sch->root_lock_key); spin_lock_init(&sch->q.lock); + lockdep_set_class(&sch->q.lock, &sch->root_lock_key); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = @@ -980,6 +986,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, return sch; errout1: + lockdep_unregister_key(&sch->root_lock_key); kfree(sch); errout: return ERR_PTR(err); @@ -1068,6 +1075,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc) if (ops->destroy) ops->destroy(qdisc); + lockdep_unregister_key(&qdisc->root_lock_key); module_put(ops->owner); netdev_put(dev, &qdisc->dev_tracker); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 4e626df742..c287bf8423 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1174,7 +1174,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } /* classification failed, try default class */ - cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); + cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), + READ_ONCE(q->defcls)), sch); if (cl == NULL || cl->level > 0) return NULL; @@ -1443,9 +1444,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; qopt = nla_data(opt); - sch_tree_lock(sch); - q->defcls = qopt->defcls; - sch_tree_unlock(sch); + WRITE_ONCE(q->defcls, qopt->defcls); return 0; } @@ -1525,7 +1524,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - qopt.defcls = q->defcls; + qopt.defcls = READ_ONCE(q->defcls); if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; return skb->len; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 3f906df143..44d9efe1a9 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -534,27 +534,31 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (tb[TCA_HHF_BACKLOG_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT])); - q->quantum = new_quantum; - q->hhf_non_hh_weight = new_hhf_non_hh_weight; + WRITE_ONCE(q->quantum, new_quantum); + WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight); if (tb[TCA_HHF_HH_FLOWS_LIMIT]) - q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]); + WRITE_ONCE(q->hh_flows_limit, + nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT])); if (tb[TCA_HHF_RESET_TIMEOUT]) { u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); - q->hhf_reset_timeout = usecs_to_jiffies(us); + WRITE_ONCE(q->hhf_reset_timeout, + usecs_to_jiffies(us)); } if (tb[TCA_HHF_ADMIT_BYTES]) - q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]); + WRITE_ONCE(q->hhf_admit_bytes, + nla_get_u32(tb[TCA_HHF_ADMIT_BYTES])); if (tb[TCA_HHF_EVICT_TIMEOUT]) { u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); - q->hhf_evict_timeout = usecs_to_jiffies(us); + WRITE_ONCE(q->hhf_evict_timeout, + usecs_to_jiffies(us)); } qlen = sch->q.qlen; @@ -657,15 +661,18 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; - if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) || - nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) || - nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) || + if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) || + nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) || + nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, + READ_ONCE(q->hh_flows_limit)) || nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, - jiffies_to_usecs(q->hhf_reset_timeout)) || - nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) || + jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) || + nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, + READ_ONCE(q->hhf_admit_bytes)) || nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, - jiffies_to_usecs(q->hhf_evict_timeout)) || - nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) + jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) || + nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, + READ_ONCE(q->hhf_non_hh_weight))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 93e6fb56f3..ff3de37874 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } -static void htb_set_lockdep_class_child(struct Qdisc *q) -{ - static struct lock_class_key child_key; - - lockdep_set_class(qdisc_lock(q), &child_key); -} - static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) { return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); @@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, return -ENOMEM; } - htb_set_lockdep_class_child(qdisc); q->direct_qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, } if (q->offload) { - htb_set_lockdep_class_child(new); /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ qdisc_refcount_inc(new); old_q = htb_graft_helper(dev_queue, new); @@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - if (q->offload) { - if (new_q) - htb_set_lockdep_class_child(new_q); + if (q->offload) htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } } sch_tree_lock(sch); @@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); if (q->offload) { - if (new_q) { - htb_set_lockdep_class_child(new_q); - /* One ref for cl->leaf.q, the other for - * dev_queue->qdisc. - */ + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + if (new_q) qdisc_refcount_inc(new_q); - } old_q = htb_graft_helper(dev_queue, new_q); /* No qdisc_put needed. */ WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index c2ef9dcf91..cc6051d4f2 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch) tcf_block_put_ext(q->block, sch, &q->block_info); if (entry) { - tcx_miniq_set_active(entry, false); + tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(entry); @@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, false, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, false); @@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); if (ingress_entry) { - tcx_miniq_set_active(ingress_entry, false); + tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(ingress_entry); @@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch) } if (egress_entry) { - tcx_miniq_set_active(egress_entry, false); + tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false); tcx_entry_free(egress_entry); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 225353fbb3..51d4013b61 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -215,10 +215,8 @@ static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt, for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) fp[tc] = priv->fp[tc]; - nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) { - if (nla_type(n) != TCA_MQPRIO_TC_ENTRY) - continue; - + nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt, + nlattr_opt_len, rem) { err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack); if (err) goto out; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 79e93a19d5..06e03f5cd7 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qopt->bands = qdisc_dev(sch)->real_num_tx_queues; - removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands), + removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands), GFP_KERNEL); if (!removed) return -ENOMEM; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 1764059b06..b3dcb845b3 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -156,36 +156,38 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); /* convert to pschedtime */ - q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + WRITE_ONCE(q->params.target, + PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC)); } /* tupdate is in jiffies */ if (tb[TCA_PIE_TUPDATE]) - q->params.tupdate = - usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + WRITE_ONCE(q->params.tupdate, + usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]))); if (tb[TCA_PIE_LIMIT]) { u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); - q->params.limit = limit; - sch->limit = limit; + WRITE_ONCE(q->params.limit, limit); + WRITE_ONCE(sch->limit, limit); } if (tb[TCA_PIE_ALPHA]) - q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); + WRITE_ONCE(q->params.alpha, nla_get_u32(tb[TCA_PIE_ALPHA])); if (tb[TCA_PIE_BETA]) - q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); + WRITE_ONCE(q->params.beta, nla_get_u32(tb[TCA_PIE_BETA])); if (tb[TCA_PIE_ECN]) - q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); + WRITE_ONCE(q->params.ecn, nla_get_u32(tb[TCA_PIE_ECN])); if (tb[TCA_PIE_BYTEMODE]) - q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + WRITE_ONCE(q->params.bytemode, + nla_get_u32(tb[TCA_PIE_BYTEMODE])); if (tb[TCA_PIE_DQ_RATE_ESTIMATOR]) - q->params.dq_rate_estimator = - nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]); + WRITE_ONCE(q->params.dq_rate_estimator, + nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR])); /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; @@ -469,17 +471,18 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) /* convert target from pschedtime to us */ if (nla_put_u32(skb, TCA_PIE_TARGET, - ((u32)PSCHED_TICKS2NS(q->params.target)) / + ((u32)PSCHED_TICKS2NS(READ_ONCE(q->params.target))) / NSEC_PER_USEC) || - nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_PIE_LIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_PIE_TUPDATE, - jiffies_to_usecs(q->params.tupdate)) || - nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || - nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || + jiffies_to_usecs(READ_ONCE(q->params.tupdate))) || + nla_put_u32(skb, TCA_PIE_ALPHA, READ_ONCE(q->params.alpha)) || + nla_put_u32(skb, TCA_PIE_BETA, READ_ONCE(q->params.beta)) || nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || - nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) || + nla_put_u32(skb, TCA_PIE_BYTEMODE, + READ_ONCE(q->params.bytemode)) || nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR, - q->params.dq_rate_estimator)) + READ_ONCE(q->params.dq_rate_estimator))) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e66f4afb92..3b9245a3c7 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -608,6 +608,7 @@ static void sfq_perturbation(struct timer_list *t) struct Qdisc *sch = q->sch; spinlock_t *root_lock; siphash_key_t nkey; + int period; get_random_bytes(&nkey, sizeof(nkey)); rcu_read_lock(); @@ -618,8 +619,12 @@ static void sfq_perturbation(struct timer_list *t) sfq_rehash(sch); spin_unlock(root_lock); - if (q->perturb_period) - mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + /* q->perturb_period can change under us from + * sfq_change() and sfq_destroy(). + */ + period = READ_ONCE(q->perturb_period); + if (period) + mod_timer(&q->perturb_timer, jiffies + period); rcu_read_unlock(); } @@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = ctl->quantum; q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); } - q->perturb_period = ctl->perturb_period * HZ; + WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); if (ctl->flows) q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { @@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - q->perturb_period = 0; + WRITE_ONCE(q->perturb_period, 0); del_timer_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index b4dd626c30..20ff7386b7 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -79,7 +79,9 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, prio = min(skb->priority, max_priority); qdisc = &q->qdiscs[prio]; - if (sch->q.qlen < sch->limit) { + + /* sch->limit can change under us from skbprio_change() */ + if (sch->q.qlen < READ_ONCE(sch->limit)) { __skb_queue_tail(qdisc, skb); qdisc_qstats_backlog_inc(sch, skb); q->qstats[prio].backlog += qdisc_pkt_len(skb); @@ -172,7 +174,7 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, if (opt->nla_len != nla_attr_size(sizeof(*ctl))) return -EINVAL; - sch->limit = ctl->limit; + WRITE_ONCE(sch->limit, ctl->limit); return 0; } @@ -200,7 +202,7 @@ static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_skbprio_qopt opt; - opt.limit = sch->limit; + opt.limit = READ_ONCE(sch->limit); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) return -1; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a0d54b4221..b284a06b5a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, list_for_each_entry(entry, &new->entries, list) cycle = ktime_add_ns(cycle, entry->interval); - if (!cycle) { - NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); - return -EINVAL; - } - if (cycle < 0 || cycle > INT_MAX) { NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); return -EINVAL; @@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, new->cycle_time = cycle; } + if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too small"); + return -EINVAL; + } + taprio_calculate_gate_durations(q, new); return 0; @@ -1176,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags); - if (!qopt && !dev->num_tc) { - NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); - return -EINVAL; - } - - /* If num_tc is already set, it means that the user already - * configured the mqprio part - */ - if (dev->num_tc) + if (!qopt) { + if (!dev->num_tc) { + NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); + return -EINVAL; + } return 0; + } /* taprio imposes that traffic classes map 1:n to tx queues */ if (qopt->num_tc > dev->num_tx_queues) { @@ -1752,10 +1749,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, fp[tc] = q->fp[tc]; } - nla_for_each_nested(n, opt, rem) { - if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) - continue; - + nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) { err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs, extack); if (err) @@ -1851,6 +1845,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } q->flags = taprio_flags; + /* Needed for length_to_duration() during netlink attribute parsing */ + taprio_set_picos_per_byte(dev, q); + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) return err; @@ -1910,7 +1907,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) goto free_sched; - taprio_set_picos_per_byte(dev, q); taprio_update_queue_max_sdu(q, new_admin, stab); if (FULL_OFFLOAD_IS_ENABLED(q->flags)) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 59304611dc..8badec6d82 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); - if (q->q.qlen < dev->tx_queue_len) { + if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { __skb_queue_tail(&q->q, skb); return NET_XMIT_SUCCESS; } @@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) } while ((q = NEXT_SLAVE(q)) != m->slaves); } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } |