summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c32
-rw-r--r--net/8021q/vlan_netlink.c10
-rw-r--r--net/8021q/vlanproc.c46
-rw-r--r--net/9p/Kconfig1
-rw-r--r--net/9p/client.c51
-rw-r--r--net/9p/trans_fd.c1
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/Kconfig7
-rw-r--r--net/Makefile2
-rw-r--r--net/appletalk/ddp.c19
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/svc.c8
-rw-r--r--net/ax25/Kconfig2
-rw-r--r--net/ax25/af_ax25.c12
-rw-r--r--net/ax25/ax25_dev.c53
-rw-r--r--net/ax25/sysctl_net_ax25.c5
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/main.c16
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c2
-rw-r--r--net/batman-adv/originator.c29
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/trace.h4
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/bnep/core.c5
-rw-r--r--net/bluetooth/hci_conn.c193
-rw-r--r--net/bluetooth/hci_core.c277
-rw-r--r--net/bluetooth/hci_event.c439
-rw-r--r--net/bluetooth/hci_request.h4
-rw-r--r--net/bluetooth/hci_sock.c9
-rw-r--r--net/bluetooth/hci_sync.c320
-rw-r--r--net/bluetooth/iso.c206
-rw-r--r--net/bluetooth/l2cap_core.c174
-rw-r--r--net/bluetooth/l2cap_sock.c107
-rw-r--r--net/bluetooth/mgmt.c104
-rw-r--r--net/bluetooth/msft.c2
-rw-r--r--net/bluetooth/msft.h4
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/sco.c14
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c95
-rw-r--r--net/bpf/test_run.c26
-rw-r--r--net/bridge/br.c15
-rw-r--r--net/bridge/br_device.c37
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_forward.c13
-rw-r--r--net/bridge/br_mst.c29
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/br_vlan_tunnel.c9
-rw-r--r--net/bridge/netfilter/Kconfig7
-rw-r--r--net/bridge/netfilter/Makefile2
-rw-r--r--net/caif/cfctrl.c8
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c69
-rw-r--r--net/can/isotp.c5
-rw-r--r--net/can/j1939/main.c6
-rw-r--r--net/can/j1939/transport.c21
-rw-r--r--net/can/raw.c104
-rw-r--r--net/ceph/crush/mapper.c7
-rw-r--r--net/ceph/mon_client.c14
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/bpf_sk_storage.c23
-rw-r--r--net/core/datagram.c39
-rw-r--r--net/core/dev.c868
-rw-r--r--net/core/dev.h31
-rw-r--r--net/core/dev_addr_lists_test.c14
-rw-r--r--net/core/drop_monitor.c20
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/dst_cache.c13
-rw-r--r--net/core/fib_rules.c17
-rw-r--r--net/core/filter.c291
-rw-r--r--net/core/flow_dissector.c22
-rw-r--r--net/core/gro.c72
-rw-r--r--net/core/gro_cells.c3
-rw-r--r--net/core/gso.c4
-rw-r--r--net/core/gso_test.c274
-rw-r--r--net/core/hotdata.c25
-rw-r--r--net/core/ieee8021q_helpers.c242
-rw-r--r--net/core/link_watch.c17
-rw-r--r--net/core/neighbour.c79
-rw-r--r--net/core/net-procfs.c58
-rw-r--r--net/core/net-sysfs.c147
-rw-r--r--net/core/net_namespace.c60
-rw-r--r--net/core/net_test.c387
-rw-r--r--net/core/netdev-genl-gen.c13
-rw-r--r--net/core/netdev-genl-gen.h2
-rw-r--r--net/core/netdev-genl.c277
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool.c182
-rw-r--r--net/core/rtnetlink.c344
-rw-r--r--net/core/scm.c17
-rw-r--r--net/core/skbuff.c371
-rw-r--r--net/core/skmsg.c8
-rw-r--r--net/core/sock.c107
-rw-r--r--net/core/sock_diag.c120
-rw-r--r--net/core/sock_map.c285
-rw-r--r--net/core/sysctl_net_core.c47
-rw-r--r--net/core/xdp.c23
-rw-r--r--net/dccp/ackvec.c8
-rw-r--r--net/dccp/ccids/Kconfig2
-rw-r--r--net/dccp/ccids/ccid2.c1
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/dccp/ipv4.c19
-rw-r--r--net/dccp/ipv6.c23
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/sysctl.c2
-rw-r--r--net/devlink/core.c6
-rw-r--r--net/devlink/dev.c14
-rw-r--r--net/devlink/param.c7
-rw-r--r--net/devlink/port.c53
-rw-r--r--net/dsa/devlink.c3
-rw-r--r--net/dsa/dsa.c17
-rw-r--r--net/dsa/port.c175
-rw-r--r--net/dsa/tag_sja1105.c4
-rw-r--r--net/dsa/trace.h34
-rw-r--r--net/dsa/user.c135
-rw-r--r--net/ethernet/eth.c4
-rw-r--r--net/ethtool/eee.c62
-rw-r--r--net/ethtool/ioctl.c70
-rw-r--r--net/ethtool/linkstate.c41
-rw-r--r--net/ethtool/netlink.c14
-rw-r--r--net/ethtool/pse-pd.c64
-rw-r--r--net/ethtool/rss.c8
-rw-r--r--net/ethtool/tsinfo.c52
-rw-r--r--net/handshake/tlshd.c1
-rw-r--r--net/hsr/hsr_device.c102
-rw-r--r--net/hsr/hsr_device.h4
-rw-r--r--net/hsr/hsr_forward.c85
-rw-r--r--net/hsr/hsr_framereg.c52
-rw-r--r--net/hsr/hsr_framereg.h4
-rw-r--r--net/hsr/hsr_main.c2
-rw-r--r--net/hsr/hsr_main.h7
-rw-r--r--net/hsr/hsr_netlink.c30
-rw-r--r--net/hsr/hsr_slave.c1
-rw-r--r--net/ieee802154/6lowpan/core.c3
-rw-r--r--net/ieee802154/6lowpan/reassembly.c8
-rw-r--r--net/ieee802154/socket.c1
-rw-r--r--net/ieee802154/sysfs.c2
-rw-r--r--net/ieee802154/sysfs.h2
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/af_inet.c116
-rw-r--r--net/ipv4/arp.c204
-rw-r--r--net/ipv4/bpf_tcp_ca.c32
-rw-r--r--net/ipv4/cipso_ipv4.c87
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c327
-rw-r--r--net/ipv4/esp4.c18
-rw-r--r--net/ipv4/esp4_offload.c7
-rw-r--r--net/ipv4/fib_frontend.c51
-rw-r--r--net/ipv4/fib_semantics.c15
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/fou_bpf.c6
-rw-r--r--net/ipv4/fou_core.c4
-rw-r--r--net/ipv4/gre_demux.c4
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/icmp.c30
-rw-r--r--net/ipv4/igmp.c7
-rw-r--r--net/ipv4/inet_connection_sock.c31
-rw-r--r--net/ipv4/inet_diag.c101
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c16
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c170
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c12
-rw-r--r--net/ipv4/ip_sockglue.c13
-rw-r--r--net/ipv4/ip_tunnel.c156
-rw-r--r--net/ipv4/ip_tunnel_core.c82
-rw-r--r--net/ipv4/ip_vti.c49
-rw-r--r--net/ipv4/ipip.c41
-rw-r--r--net/ipv4/ipmr.c11
-rw-r--r--net/ipv4/netfilter/Kconfig45
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c18
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c2
-rw-r--r--net/ipv4/nexthop.c376
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/raw_diag.c1
-rw-r--r--net/ipv4/route.c97
-rw-r--r--net/ipv4/syncookies.c67
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c95
-rw-r--r--net/ipv4/tcp_ao.c64
-rw-r--r--net/ipv4/tcp_bbr.c10
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c8
-rw-r--r--net/ipv4/tcp_dctcp.c21
-rw-r--r--net/ipv4/tcp_diag.c1
-rw-r--r--net/ipv4/tcp_input.c246
-rw-r--r--net/ipv4/tcp_ipv4.c93
-rw-r--r--net/ipv4/tcp_metrics.c8
-rw-r--r--net/ipv4/tcp_minisocks.c46
-rw-r--r--net/ipv4/tcp_offload.c275
-rw-r--r--net/ipv4/tcp_output.c145
-rw-r--r--net/ipv4/tcp_timer.c42
-rw-r--r--net/ipv4/udp.c76
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv4/udp_offload.c56
-rw-r--r--net/ipv4/udp_tunnel_core.c5
-rw-r--r--net/ipv4/xfrm4_input.c21
-rw-r--r--net/ipv4/xfrm4_policy.c5
-rw-r--r--net/ipv6/addrconf.c793
-rw-r--r--net/ipv6/addrlabel.c18
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/anycast.c66
-rw-r--r--net/ipv6/calipso.c5
-rw-r--r--net/ipv6/esp6.c18
-rw-r--r--net/ipv6/esp6_offload.c7
-rw-r--r--net/ipv6/exthdrs.c34
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/ila/ila_lwt.c11
-rw-r--r--net/ipv6/inet6_hashtables.c12
-rw-r--r--net/ipv6/ioam6.c72
-rw-r--r--net/ipv6/ioam6_iptunnel.c8
-rw-r--r--net/ipv6/ip6_fib.c148
-rw-r--r--net/ipv6/ip6_gre.c124
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c35
-rw-r--r--net/ipv6/ip6_output.c31
-rw-r--r--net/ipv6/ip6_tunnel.c42
-rw-r--r--net/ipv6/ip6_vti.c26
-rw-r--r--net/ipv6/ip6mr.c11
-rw-r--r--net/ipv6/ipv6_sockglue.c8
-rw-r--r--net/ipv6/mcast.c14
-rw-r--r--net/ipv6/ndisc.c120
-rw-r--r--net/ipv6/netfilter.c1
-rw-r--r--net/ipv6/netfilter/Kconfig20
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c7
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/output_core.c4
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c24
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/route.c125
-rw-r--r--net/ipv6/rpl_iptunnel.c14
-rw-r--r--net/ipv6/seg6.c5
-rw-r--r--net/ipv6/seg6_hmac.c50
-rw-r--r--net/ipv6/seg6_iptunnel.c25
-rw-r--r--net/ipv6/seg6_local.c8
-rw-r--r--net/ipv6/sit.c64
-rw-r--r--net/ipv6/syncookies.c36
-rw-r--r--net/ipv6/sysctl_net_ipv6.c14
-rw-r--r--net/ipv6/tcp_ipv6.c106
-rw-r--r--net/ipv6/tcpv6_offload.c139
-rw-r--r--net/ipv6/udp.c61
-rw-r--r--net/ipv6/udp_offload.c24
-rw-r--r--net/ipv6/xfrm6_input.c28
-rw-r--r--net/ipv6/xfrm6_policy.c13
-rw-r--r--net/ipv6/xfrm6_tunnel.c5
-rw-r--r--net/iucv/af_iucv.c20
-rw-r--r--net/iucv/iucv.c92
-rw-r--r--net/kcm/kcmsock.c15
-rw-r--r--net/l2tp/l2tp_core.c96
-rw-r--r--net/l2tp/l2tp_eth.c5
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/llc/af_llc.c7
-rw-r--r--net/llc/sysctl_net_llc.c8
-rw-r--r--net/mac80211/Makefile2
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c534
-rw-r--r--net/mac80211/chan.c853
-rw-r--r--net/mac80211/debug.h18
-rw-r--r--net/mac80211/debugfs.c4
-rw-r--r--net/mac80211/driver-ops.c31
-rw-r--r--net/mac80211/driver-ops.h27
-rw-r--r--net/mac80211/drop.h3
-rw-r--r--net/mac80211/he.c10
-rw-r--r--net/mac80211/ht.c8
-rw-r--r--net/mac80211/ibss.c55
-rw-r--r--net/mac80211/ieee80211_i.h232
-rw-r--r--net/mac80211/iface.c63
-rw-r--r--net/mac80211/key.c20
-rw-r--r--net/mac80211/link.c39
-rw-r--r--net/mac80211/main.c242
-rw-r--r--net/mac80211/mesh.c163
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_pathtbl.c13
-rw-r--r--net/mac80211/mesh_plink.c28
-rw-r--r--net/mac80211/mlme.c3155
-rw-r--r--net/mac80211/ocb.c5
-rw-r--r--net/mac80211/offchannel.c33
-rw-r--r--net/mac80211/parse.c971
-rw-r--r--net/mac80211/rate.c18
-rw-r--r--net/mac80211/rx.c64
-rw-r--r--net/mac80211/scan.c112
-rw-r--r--net/mac80211/spectmgmt.c341
-rw-r--r--net/mac80211/sta_info.c25
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/status.c22
-rw-r--r--net/mac80211/tdls.c73
-rw-r--r--net/mac80211/tests/elems.c5
-rw-r--r--net/mac80211/trace.h203
-rw-r--r--net/mac80211/trace_msg.h2
-rw-r--r--net/mac80211/tx.c90
-rw-r--r--net/mac80211/util.c1823
-rw-r--r--net/mac80211/vht.c6
-rw-r--r--net/mac80211/wpa.c45
-rw-r--r--net/mac802154/main.c14
-rw-r--r--net/mac802154/tx.c8
-rw-r--r--net/mctp/Kconfig1
-rw-r--r--net/mctp/af_mctp.c117
-rw-r--r--net/mctp/route.c102
-rw-r--r--net/mctp/test/route-test.c413
-rw-r--r--net/mctp/test/utils.c2
-rw-r--r--net/mpls/af_mpls.c82
-rw-r--r--net/mpls/mpls_gso.c2
-rw-r--r--net/mpls/mpls_iptunnel.c6
-rw-r--r--net/mptcp/ctrl.c71
-rw-r--r--net/mptcp/diag.c1
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h4
-rw-r--r--net/mptcp/mptcp_diag.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.c7
-rw-r--r--net/mptcp/mptcp_pm_gen.h2
-rw-r--r--net/mptcp/options.c26
-rw-r--r--net/mptcp/pm.c41
-rw-r--r--net/mptcp/pm_netlink.c230
-rw-r--r--net/mptcp/pm_userspace.c240
-rw-r--r--net/mptcp/protocol.c168
-rw-r--r--net/mptcp/protocol.h146
-rw-r--r--net/mptcp/sched.c22
-rw-r--r--net/mptcp/sockopt.c157
-rw-r--r--net/mptcp/subflow.c129
-rw-r--r--net/mptcp/token_test.c7
-rw-r--r--net/ncsi/internal.h2
-rw-r--r--net/ncsi/ncsi-manage.c73
-rw-r--r--net/ncsi/ncsi-rsp.c4
-rw-r--r--net/netfilter/Kconfig12
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c92
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c36
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/netfilter/nf_conncount.c8
-rw-r--r--net/netfilter/nf_conntrack_bpf.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c23
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_hooks_lwtunnel.c70
-rw-r--r--net/netfilter/nf_internals.h6
-rw-r--r--net/netfilter/nf_log.c14
-rw-r--r--net/netfilter/nf_nat_bpf.c4
-rw-r--r--net/netfilter/nf_queue.c106
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c223
-rw-r--r--net/netfilter/nf_tables_core.c6
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c144
-rw-r--r--net/netfilter/nft_chain_filter.c6
-rw-r--r--net/netfilter/nft_connlimit.c4
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_fib.c8
-rw-r--r--net/netfilter/nft_last.c4
-rw-r--r--net/netfilter/nft_limit.c14
-rw-r--r--net/netfilter/nft_lookup.c5
-rw-r--r--net/netfilter/nft_meta.c3
-rw-r--r--net/netfilter/nft_osf.c11
-rw-r--r--net/netfilter/nft_payload.c99
-rw-r--r--net/netfilter/nft_quota.c4
-rw-r--r--net/netfilter/nft_rt.c4
-rw-r--r--net/netfilter/nft_set_pipapo.c429
-rw-r--r--net/netfilter/nft_set_pipapo.h54
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c22
-rw-r--r--net/netfilter/nft_tunnel.c44
-rw-r--r--net/netfilter/utils.c37
-rw-r--r--net/netfilter/x_tables.c3
-rw-r--r--net/netlabel/netlabel_kapi.c41
-rw-r--r--net/netlink/af_netlink.c213
-rw-r--r--net/netlink/af_netlink.h5
-rw-r--r--net/netlink/diag.c3
-rw-r--r--net/netlink/genetlink.c44
-rw-r--r--net/netlink/genetlink.h11
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_route.c19
-rw-r--r--net/netrom/nr_timer.c3
-rw-r--r--net/netrom/sysctl_net_netrom.c1
-rw-r--r--net/nfc/core.c2
-rw-r--r--net/nfc/hci/llc.c20
-rw-r--r--net/nfc/llcp_sock.c16
-rw-r--r--net/nfc/nci/core.c17
-rw-r--r--net/nfc/netlink.c6
-rw-r--r--net/nsh/nsh.c14
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/conntrack.c7
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/openvswitch/flow.c3
-rw-r--r--net/openvswitch/flow_netlink.c61
-rw-r--r--net/openvswitch/meter.h1
-rw-r--r--net/openvswitch/openvswitch_trace.h8
-rw-r--r--net/openvswitch/vport-netdev.c7
-rw-r--r--net/packet/af_packet.c122
-rw-r--r--net/packet/diag.c3
-rw-r--r--net/phonet/pep.c12
-rw-r--r--net/phonet/pn_netlink.c19
-rw-r--r--net/phonet/socket.c7
-rw-r--r--net/phonet/sysctl.c1
-rw-r--r--net/psample/psample.c26
-rw-r--r--net/qrtr/mhi.c46
-rw-r--r--net/qrtr/ns.c27
-rw-r--r--net/rds/connection.c4
-rw-r--r--net/rds/ib_sysctl.c1
-rw-r--r--net/rds/sysctl.c1
-rw-r--r--net/rds/tcp.c1
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rfkill/rfkill-gpio.c6
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/rose/sysctl_net_rose.c1
-rw-r--r--net/rxrpc/af_rxrpc.c14
-rw-r--r--net/rxrpc/ar-internal.h90
-rw-r--r--net/rxrpc/call_event.c327
-rw-r--r--net/rxrpc/call_object.c63
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/conn_event.c16
-rw-r--r--net/rxrpc/conn_object.c13
-rw-r--r--net/rxrpc/input.c165
-rw-r--r--net/rxrpc/insecure.c11
-rw-r--r--net/rxrpc/io_thread.c11
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/misc.c8
-rw-r--r--net/rxrpc/output.c441
-rw-r--r--net/rxrpc/proc.c10
-rw-r--r--net/rxrpc/protocol.h6
-rw-r--r--net/rxrpc/rtt.c36
-rw-r--r--net/rxrpc/rxkad.c57
-rw-r--r--net/rxrpc/sendmsg.c63
-rw-r--r--net/rxrpc/sysctl.c17
-rw-r--r--net/rxrpc/txbuf.c174
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/act_api.c5
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/act_ct.c27
-rw-r--r--net/sched/act_ctinfo.c1
-rw-r--r--net/sched/act_gact.c1
-rw-r--r--net/sched/act_gate.c1
-rw-r--r--net/sched/act_ife.c1
-rw-r--r--net/sched/act_mirred.c1
-rw-r--r--net/sched/act_mpls.c1
-rw-r--r--net/sched/act_nat.c1
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbedit.c1
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/act_tunnel_key.c37
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c43
-rw-r--r--net/sched/cls_basic.c1
-rw-r--r--net/sched/cls_bpf.c1
-rw-r--r--net/sched/cls_cgroup.c1
-rw-r--r--net/sched/cls_flow.c1
-rw-r--r--net/sched/cls_flower.c135
-rw-r--r--net/sched/cls_fw.c1
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_cake.c113
-rw-r--r--net/sched/sch_cbs.c21
-rw-r--r--net/sched/sch_choke.c22
-rw-r--r--net/sched/sch_codel.c62
-rw-r--r--net/sched/sch_drr.c1
-rw-r--r--net/sched/sch_etf.c11
-rw-r--r--net/sched/sch_ets.c26
-rw-r--r--net/sched/sch_fifo.c13
-rw-r--r--net/sched/sch_fq.c109
-rw-r--r--net/sched/sch_fq_codel.c58
-rw-r--r--net/sched/sch_fq_pie.c63
-rw-r--r--net/sched/sch_generic.c19
-rw-r--r--net/sched/sch_gred.c1
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_hhf.c36
-rw-r--r--net/sched/sch_htb.c23
-rw-r--r--net/sched/sch_ingress.c15
-rw-r--r--net/sched/sch_mqprio.c7
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c1
-rw-r--r--net/sched/sch_pie.c40
-rw-r--r--net/sched/sch_plug.c1
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_qfq.c1
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfb.c1
-rw-r--r--net/sched/sch_sfq.c14
-rw-r--r--net/sched/sch_skbprio.c9
-rw-r--r--net/sched/sch_taprio.c107
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sched/sch_teql.c4
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/input.c19
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c14
-rw-r--r--net/sctp/sm_statefuns.c1
-rw-r--r--net/sctp/socket.c18
-rw-r--r--net/sctp/sysctl.c12
-rw-r--r--net/smc/Kconfig13
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c84
-rw-r--r--net/smc/smc.h4
-rw-r--r--net/smc/smc_cdc.c36
-rw-r--r--net/smc/smc_clc.c12
-rw-r--r--net/smc/smc_clc.h28
-rw-r--r--net/smc/smc_core.c70
-rw-r--r--net/smc/smc_core.h1
-rw-r--r--net/smc/smc_diag.c1
-rw-r--r--net/smc/smc_ib.c19
-rw-r--r--net/smc/smc_ism.c88
-rw-r--r--net/smc/smc_ism.h20
-rw-r--r--net/smc/smc_loopback.c427
-rw-r--r--net/smc/smc_loopback.h61
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_stats.h2
-rw-r--r--net/smc/smc_sysctl.c8
-rw-r--r--net/smc/smc_tracepoint.h4
-rw-r--r--net/socket.c33
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c4
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c12
-rw-r--r--net/sunrpc/clnt.c22
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c47
-rw-r--r--net/sunrpc/svc_xprt.c168
-rw-r--r--net/sunrpc/sysctl.c1
-rw-r--r--net/sunrpc/xprt.c9
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c181
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c148
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c15
-rw-r--r--net/sunrpc/xprtrdma/transport.c1
-rw-r--r--net/sunrpc/xprtrdma/verbs.c22
-rw-r--r--net/sunrpc/xprtsock.c41
-rw-r--r--net/switchdev/switchdev.c99
-rw-r--r--net/sysctl_net.c3
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile4
-rw-r--r--net/tipc/bearer.c15
-rw-r--r--net/tipc/diag.c1
-rw-r--r--net/tipc/msg.c8
-rw-r--r--net/tipc/node.c3
-rw-r--r--net/tipc/socket.c19
-rw-r--r--net/tipc/sysctl.c1
-rw-r--r--net/tipc/trace.h16
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_device.c1
-rw-r--r--net/tls/tls_device_fallback.c1
-rw-r--r--net/tls/tls_main.c10
-rw-r--r--net/tls/tls_strp.c1
-rw-r--r--net/tls/tls_sw.c1
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c399
-rw-r--r--net/unix/diag.c13
-rw-r--r--net/unix/garbage.c691
-rw-r--r--net/unix/scm.c161
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/unix/sysctl_net_unix.c3
-rw-r--r--net/unix/unix_bpf.c3
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/vmw_vsock/diag.c1
-rw-r--r--net/vmw_vsock/virtio_transport.c1
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/chan.c377
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h52
-rw-r--r--net/wireless/mlme.c146
-rw-r--r--net/wireless/nl80211.c464
-rw-r--r--net/wireless/pmsr.c8
-rw-r--r--net/wireless/rdev-ops.h6
-rw-r--r--net/wireless/reg.c35
-rw-r--r--net/wireless/reg.h13
-rw-r--r--net/wireless/scan.c776
-rw-r--r--net/wireless/sme.c5
-rw-r--r--net/wireless/sysfs.c4
-rw-r--r--net/wireless/tests/Makefile2
-rw-r--r--net/wireless/tests/chan.c228
-rw-r--r--net/wireless/tests/fragmentation.c30
-rw-r--r--net/wireless/tests/scan.c247
-rw-r--r--net/wireless/trace.h76
-rw-r--r--net/wireless/util.c91
-rw-r--r--net/x25/Kconfig2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/xdp/xdp_umem.c9
-rw-r--r--net/xdp/xsk_buff_pool.c29
-rw-r--r--net/xdp/xsk_diag.c1
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_compat.c7
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_input.c20
-rw-r--r--net/xfrm/xfrm_interface_bpf.c4
-rw-r--r--net/xfrm/xfrm_interface_core.c28
-rw-r--r--net/xfrm/xfrm_policy.c174
-rw-r--r--net/xfrm/xfrm_proc.c3
-rw-r--r--net/xfrm/xfrm_replay.c3
-rw-r--r--net/xfrm/xfrm_state.c90
-rw-r--r--net/xfrm/xfrm_state_bpf.c4
-rw-r--r--net/xfrm/xfrm_sysctl.c7
-rw-r--r--net/xfrm/xfrm_user.c165
635 files changed, 23159 insertions, 14174 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f001582345..9404dd551d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
if (unlikely(!vhdr))
goto out;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen;
+
type = vhdr->h_vlan_encapsulated_proto;
ptype = gro_find_receive_by_type(type);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 407b2335f0..3efba4f857 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -149,7 +149,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
if (max_mtu < new_mtu)
return -ERANGE;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -504,28 +504,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock,
- &vlan_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
-}
-
static __be16 vlan_parse_protocol(const struct sk_buff *skb)
{
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -559,7 +537,7 @@ static const struct header_ops vlan_passthru_header_ops = {
.parse_protocol = vlan_parse_protocol,
};
-static struct device_type vlan_type = {
+static const struct device_type vlan_type = {
.name = "vlan",
};
@@ -627,7 +605,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev);
+ netdev_lockdep_set_classes(dev);
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
@@ -784,9 +762,9 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
static int vlan_dev_get_iflink(const struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+ const struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- return real_dev->ifindex;
+ return READ_ONCE(real_dev->ifindex);
}
static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a3b68243fd..cf5219df79 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -117,17 +117,15 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
- nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
- if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
- continue;
+ nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING,
+ data[IFLA_VLAN_INGRESS_QOS], rem) {
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
- nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
- if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
- continue;
+ nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING,
+ data[IFLA_VLAN_EGRESS_QOS], rem) {
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 7825c12974..87b959da00 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -163,48 +163,34 @@ void vlan_proc_rem_dev(struct net_device *vlandev)
* The following few functions build the content of /proc/net/vlan/config
*/
-/* start read of /proc/net/vlan/config */
-static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu)
+static void *vlan_seq_from_index(struct seq_file *seq, loff_t *pos)
{
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct net *net = seq_file_net(seq);
- loff_t i = 1;
-
- rcu_read_lock();
- if (*pos == 0)
- return SEQ_START_TOKEN;
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
if (!is_vlan_dev(dev))
continue;
-
- if (i++ == *pos)
- return dev;
+ *pos = dev->ifindex;
+ return dev;
}
+ return NULL;
+}
+
+static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev;
- struct net *net = seq_file_net(seq);
-
++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&net->dev_base_head);
-
- for_each_netdev_continue_rcu(net, dev) {
- if (!is_vlan_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 00ebce9e5a..bcdab9c23b 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -5,6 +5,7 @@
menuconfig NET_9P
tristate "Plan 9 Resource Sharing Support (9P2000)"
+ select NETFS_SUPPORT
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/net/9p/client.c b/net/9p/client.c
index f7e90b4769..5cd94721d9 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -18,6 +18,7 @@
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -235,6 +236,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
+ fc->id = 0;
+ fc->tag = P9_NOTAG;
return 0;
}
@@ -1661,6 +1664,54 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
}
EXPORT_SYMBOL(p9_client_write);
+void
+p9_client_write_subreq(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct p9_fid *fid = wreq->netfs_priv;
+ struct p9_client *clnt = fid->clnt;
+ struct p9_req_t *req;
+ unsigned long long start = subreq->start + subreq->transferred;
+ int written, len = subreq->len - subreq->transferred;
+ int err;
+
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n",
+ fid->fid, start, len);
+
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && len > 1024) {
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter,
+ 0, wreq->len, P9_ZC_HDR_SZ, "dqd",
+ fid->fid, start, len);
+ } else {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+ start, len, &subreq->io_iter);
+ }
+ if (IS_ERR(req)) {
+ netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false);
+ return;
+ }
+
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
+ if (err) {
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, err, false);
+ return;
+ }
+
+ if (written > len) {
+ pr_err("bogus RWRITE count (%d > %u)\n", written, len);
+ written = len;
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len);
+
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, written, false);
+}
+EXPORT_SYMBOL(p9_client_write_subreq);
+
struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 1a3948b8c4..196060dc61 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -95,7 +95,6 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @rreq: read request
* @wreq: write request
- * @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
* @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e305071eb7..0b8086f58a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -781,7 +781,6 @@ static struct virtio_driver p9_virtio_drv = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = p9_virtio_probe,
.remove = p9_virtio_remove,
diff --git a/net/Kconfig b/net/Kconfig
index 4adc47d0c9..f0a8692496 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -60,6 +60,9 @@ config NET_XGRESS
config NET_REDIRECT
bool
+config SKB_DECRYPTED
+ bool
+
config SKB_EXTENSIONS
bool
@@ -331,6 +334,7 @@ config NET_RX_BUSY_POLL
config BQL
bool
+ prompt "Enable Byte Queue Limits"
depends on SYSFS
select DQL
default y
@@ -448,6 +452,9 @@ config GRO_CELLS
config SOCK_VALIDATE_XMIT
bool
+config NET_IEEE8021Q_HELPERS
+ bool
+
config NET_SELFTESTS
def_tristate PHYLIB
depends on PHYLIB && INET
diff --git a/net/Makefile b/net/Makefile
index b06b5539e7..65bb8c72a3 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX_SCM) += unix/
+obj-$(CONFIG_UNIX) += unix/
obj-y += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 198f5ba2fe..b068651984 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -88,6 +88,7 @@ static inline void atalk_remove_socket(struct sock *sk)
static struct sock *atalk_search_socket(struct sockaddr_at *to,
struct atalk_iface *atif)
{
+ struct sock *def_socket = NULL;
struct sock *s;
read_lock_bh(&atalk_sockets_lock);
@@ -98,8 +99,20 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
continue;
if (to->sat_addr.s_net == ATADDR_ANYNET &&
- to->sat_addr.s_node == ATADDR_BCAST)
- goto found;
+ to->sat_addr.s_node == ATADDR_BCAST) {
+ if (atif->address.s_node == at->src_node &&
+ atif->address.s_net == at->src_net) {
+ /* This socket's address matches the address of the interface
+ * that received the packet -- use it
+ */
+ goto found;
+ }
+
+ /* Continue searching for a socket matching the interface address,
+ * but use this socket by default if no other one is found
+ */
+ def_socket = s;
+ }
if (to->sat_addr.s_net == at->src_net &&
(to->sat_addr.s_node == at->src_node ||
@@ -116,7 +129,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
goto found;
}
}
- s = NULL;
+ s = def_socket;
found:
read_unlock_bh(&atalk_sockets_lock);
return s;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index d945b7c017..7aebfe9032 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -40,7 +40,6 @@ static struct ctl_table atalk_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
static struct ctl_table_header *atalk_table_header;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 294cb9efe3..42b910cb4e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
if (rt->rt_gw_family == AF_INET)
daddr = &rt->rt_gw4;
else
@@ -463,7 +463,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
unlink_clip_vcc(clip_vcc);
return 0;
}
- rt = ip_route_output(&init_net, ip, 0, 1, 0);
+ rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
diff --git a/net/atm/common.c b/net/atm/common.c
index 2a1ec014e9..9b75699992 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -116,7 +116,7 @@ static void vcc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 36a814f1fb..f8137ae693 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -324,8 +324,8 @@ out:
return error;
}
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int svc_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
@@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
lock_sock(sk);
- error = svc_create(sock_net(sk), newsock, 0, kern);
+ error = svc_create(sock_net(sk), newsock, 0, arg->kern);
if (error)
goto out;
@@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
error = -sk->sk_err;
break;
}
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
error = -EAGAIN;
break;
}
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index fdb666607f..e23a3dc14b 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -4,7 +4,7 @@
#
menuconfig HAMRADIO
- depends on NET && !S390
+ depends on NET
bool "Amateur Radio support"
help
If you want to connect your Linux box to an amateur radio, answer Y
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9169efb2f4..d6f9fae06a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1373,13 +1373,15 @@ out_release:
return err;
}
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int ax25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
+ ax25_dev *ax25_dev;
DEFINE_WAIT(wait);
struct sock *sk;
+ ax25_cb *ax25;
int err = 0;
if (sock->state != SS_UNCONNECTED)
@@ -1409,7 +1411,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
@@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
kfree_skb(skb);
sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
+ ax25 = sk_to_ax25(newsk);
+ ax25_dev = ax25->ax25_dev;
+ netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
+ ax25_dev_hold(ax25_dev);
out:
release_sock(sk);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 282ec581c0..9efd6690b3 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -22,11 +22,12 @@
#include <net/sock.h>
#include <linux/uaccess.h>
#include <linux/fcntl.h>
+#include <linux/list.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-ax25_dev *ax25_dev_list;
+static LIST_HEAD(ax25_dev_list);
DEFINE_SPINLOCK(ax25_dev_lock);
ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
@@ -34,10 +35,11 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
ax25_dev *ax25_dev, *res = NULL;
spin_lock_bh(&ax25_dev_lock);
- for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+ list_for_each_entry(ax25_dev, &ax25_dev_list, list)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
ax25_dev_hold(ax25_dev);
+ break;
}
spin_unlock_bh(&ax25_dev_lock);
@@ -59,7 +61,6 @@ void ax25_dev_device_up(struct net_device *dev)
}
refcount_set(&ax25_dev->refcount, 1);
- dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
ax25_dev->forward = NULL;
@@ -78,17 +79,19 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2;
ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN;
ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
+#endif
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_ds_setup_timer(ax25_dev);
#endif
spin_lock_bh(&ax25_dev_lock);
- ax25_dev->next = ax25_dev_list;
- ax25_dev_list = ax25_dev;
+ list_add(&ax25_dev->list, &ax25_dev_list);
+ dev->ax25_ptr = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -111,32 +114,19 @@ void ax25_dev_device_down(struct net_device *dev)
/*
* Remove any packet forwarding that points to this device.
*/
- for (s = ax25_dev_list; s != NULL; s = s->next)
+ list_for_each_entry(s, &ax25_dev_list, list)
if (s->forward == dev)
s->forward = NULL;
- if ((s = ax25_dev_list) == ax25_dev) {
- ax25_dev_list = s->next;
- goto unlock_put;
- }
-
- while (s != NULL && s->next != NULL) {
- if (s->next == ax25_dev) {
- s->next = ax25_dev->next;
- goto unlock_put;
+ list_for_each_entry(s, &ax25_dev_list, list) {
+ if (s == ax25_dev) {
+ list_del(&s->list);
+ break;
}
-
- s = s->next;
}
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- ax25_dev_put(ax25_dev);
- return;
-unlock_put:
- spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
+ spin_unlock_bh(&ax25_dev_lock);
netdev_put(dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
}
@@ -200,16 +190,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev)
*/
void __exit ax25_dev_free(void)
{
- ax25_dev *s, *ax25_dev;
+ ax25_dev *s, *n;
spin_lock_bh(&ax25_dev_lock);
- ax25_dev = ax25_dev_list;
- while (ax25_dev != NULL) {
- s = ax25_dev;
- netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker);
- ax25_dev = ax25_dev->next;
- kfree(s);
+ list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
+ netdev_put(s->dev, &s->dev_tracker);
+ list_del(&s->list);
+ ax25_dev_put(s);
}
- ax25_dev_list = NULL;
spin_unlock_bh(&ax25_dev_lock);
}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index db66e11e7f..68753aa303 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -141,8 +141,6 @@ static const struct ctl_table ax25_param_table[] = {
.extra2 = &max_ds_timeout
},
#endif
-
- { } /* that's all, folks! */
};
int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
@@ -155,6 +153,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
if (!table)
return -ENOMEM;
+ BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES);
for (k = 0; k < AX25_MAX_VALUES; k++)
table[k].data = &ax25_dev->values[k];
@@ -171,7 +170,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
void ax25_unregister_dev_sysctl(ax25_dev *ax25_dev)
{
struct ctl_table_header *header = ax25_dev->sysheader;
- struct ctl_table *table;
+ const struct ctl_table *table;
if (header) {
ax25_dev->sysheader = NULL;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 28a939d560..4c7e855343 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -684,7 +684,7 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
- goto out;
+ return ret;
batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip);
@@ -728,7 +728,6 @@ free_orig:
batadv_orig_node_put(cand[i].orig_node);
}
-out:
kfree(cand);
return ret;
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5fc754b0b3..8e0f44c716 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -14,7 +14,6 @@
#include <linux/crc32c.h>
#include <linux/device.h>
#include <linux/errno.h>
-#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
@@ -38,6 +37,7 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/dsfield.h>
+#include <net/genetlink.h>
#include <net/rtnetlink.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
@@ -691,29 +691,31 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
"%s%s", BATADV_UEV_TYPE_VAR,
batadv_uev_type_str[type]);
if (!uevent_env[0])
- goto out;
+ goto report_error;
uevent_env[1] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_ACTION_VAR,
batadv_uev_action_str[action]);
if (!uevent_env[1])
- goto out;
+ goto free_first_env;
/* If the event is DEL, ignore the data field */
if (action != BATADV_UEV_DEL) {
uevent_env[2] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_DATA_VAR, data);
if (!uevent_env[2])
- goto out;
+ goto free_second_env;
}
ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
-out:
- kfree(uevent_env[0]);
- kfree(uevent_env[1]);
kfree(uevent_env[2]);
+free_second_env:
+ kfree(uevent_env[1]);
+free_first_env:
+ kfree(uevent_env[0]);
if (ret)
+report_error:
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
batadv_uev_type_str[type],
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 870dcd7f17..3d4c36ae2e 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.0"
+#define BATADV_SOURCE_VERSION "2024.2"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 1f7ed9d4f6..9362cd9d6f 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -15,8 +15,6 @@
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/errno.h>
-#include <linux/export.h>
-#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 71c143d4b6..8f6dd2c6ee 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
+#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -132,6 +133,29 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding
+ * @vid: the VLAN identifier
+ *
+ * Return: true when either no vlan is set or if VLAN is in correct range,
+ * false otherwise
+ */
+static bool batadv_vlan_id_valid(unsigned short vid)
+{
+ unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK);
+
+ if (vid == 0)
+ return true;
+
+ if (!(vid & BATADV_VLAN_HAS_TAG))
+ return false;
+
+ if (non_vlan)
+ return false;
+
+ return true;
+}
+
+/**
* batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan
* object
* @orig_node: the originator serving the VLAN
@@ -149,6 +173,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
{
struct batadv_orig_node_vlan *vlan;
+ if (!batadv_vlan_id_valid(vid))
+ return NULL;
+
spin_lock_bh(&orig_node->vlan_list_lock);
/* first look if an object for this vid already exists */
@@ -1266,6 +1293,8 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
/* for all origins... */
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
+ if (hlist_empty(head))
+ continue;
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 89c51b3cf4..30ecbc2ef1 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -159,7 +159,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
bat_priv->mtu_set_by_user = new_mtu;
return 0;
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 5dd52bc5ca..6b816cf1a9 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -40,8 +40,8 @@ TRACE_EVENT(batadv_dbg,
),
TP_fast_assign(
- __assign_str(device, bat_priv->soft_iface->name);
- __assign_str(driver, KBUILD_MODNAME);
+ __assign_str(device);
+ __assign_str(driver);
__assign_vstr(msg, vaf->fmt, vaf->va);
),
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 715cbafbf6..50cfec8cca 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
struct in6_addr *daddr,
struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int count = atomic_read(&dev->peer_count);
const struct in6_addr *nexthop;
struct lowpan_peer *peer;
@@ -572,7 +572,7 @@ static void netdev_setup(struct net_device *dev)
dev->needs_free_netdev = true;
}
-static struct device_type bt_type = {
+static const struct device_type bt_type = {
.name = "bluetooth",
};
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 5a6a49885a..ec45f77fce 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -385,7 +385,8 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_COMPRESSED_DST_ONLY:
__skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN);
- __skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2);
+ __skb_put_data(nskb, s->eh.h_source, ETH_ALEN);
+ put_unaligned(s->eh.h_proto, (__be16 *)__skb_put(nskb, 2));
break;
case BNEP_GENERAL:
@@ -549,7 +550,7 @@ static struct device *bnep_get_device(struct bnep_session *session)
return &conn->hcon->dev;
}
-static struct device_type bnep_type = {
+static const struct device_type bnep_type = {
.name = "bluetooth",
};
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 18f97b2288..080053a85b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -241,13 +241,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
__u8 vnd_len, *vnd_data = NULL;
struct hci_op_configure_data_path *cmd = NULL;
+ /* Do not take below 2 checks as error since the 1st means user do not
+ * want to use HFP offload mode and the 2nd means the vendor controller
+ * do not need to send below HCI command for offload mode.
+ */
if (!codec->data_path || !hdev->get_codec_config_data)
return 0;
- /* Do not take me as error */
- if (!hdev->get_codec_config_data)
- return 0;
-
err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
&vnd_data);
if (err < 0)
@@ -664,11 +664,6 @@ static void le_conn_timeout(struct work_struct *work)
hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
-struct iso_cig_params {
- struct hci_cp_le_set_cig_params cp;
- struct hci_cis_params cis[0x1f];
-};
-
struct iso_list_data {
union {
u8 cig;
@@ -904,16 +899,42 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
U16_MAX, GFP_ATOMIC);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role, u16 handle)
+static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
{
struct hci_conn *conn;
+ switch (type) {
+ case ACL_LINK:
+ if (!hdev->acl_mtu)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case ISO_LINK:
+ if (hdev->iso_mtu)
+ /* Dedicated ISO Buffer exists */
+ break;
+ fallthrough;
+ case LE_LINK:
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ if (!hdev->le_mtu && hdev->acl_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case SCO_LINK:
+ case ESCO_LINK:
+ if (!hdev->sco_pkts)
+ /* Controller does not support SCO or eSCO over HCI */
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ default:
+ return ERR_PTR(-ECONNREFUSED);
+ }
+
bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle);
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
- return NULL;
+ return ERR_PTR(-ENOMEM);
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
@@ -944,10 +965,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
switch (type) {
case ACL_LINK:
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
+ conn->mtu = hdev->acl_mtu;
break;
case LE_LINK:
/* conn->src should reflect the local identity address */
hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+ conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case ISO_LINK:
/* conn->src should reflect the local identity address */
@@ -959,6 +982,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
else if (conn->role == HCI_ROLE_MASTER)
conn->cleanup = cis_cleanup;
+ conn->mtu = hdev->iso_mtu ? hdev->iso_mtu :
+ hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case SCO_LINK:
if (lmp_esco_capable(hdev))
@@ -966,9 +991,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
(hdev->esco_type & EDR_ESCO_MASK);
else
conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
+
+ conn->mtu = hdev->sco_mtu;
break;
case ESCO_LINK:
conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
+ conn->mtu = hdev->sco_mtu;
break;
}
@@ -1011,9 +1039,18 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
handle = hci_conn_hash_alloc_unset(hdev);
if (unlikely(handle < 0))
- return NULL;
+ return ERR_PTR(-ECONNREFUSED);
- return hci_conn_add(hdev, type, dst, role, handle);
+ return __hci_conn_add(hdev, type, dst, role, handle);
+}
+
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
+{
+ if (handle > HCI_CONN_HANDLE_MAX)
+ return ERR_PTR(-EINVAL);
+
+ return __hci_conn_add(hdev, type, dst, role, handle);
}
static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
@@ -1140,8 +1177,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
list_for_each_entry(d, &hci_dev_list, list) {
if (!test_bit(HCI_UP, &d->flags) ||
- hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
- d->dev_type != HCI_PRIMARY)
+ hci_dev_test_flag(d, HCI_USER_CHANNEL))
continue;
/* Simple routing:
@@ -1317,8 +1353,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
bacpy(&conn->dst, dst);
} else {
conn = hci_conn_add_unset(hdev, LE_LINK, dst, role);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
hci_conn_hold(conn);
conn->pending_sec_level = sec_level;
}
@@ -1494,8 +1530,8 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
conn->state = BT_CONNECT;
@@ -1538,8 +1574,8 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
BT_DBG("requesting refresh of dst_addr");
conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) {
hci_conn_del(conn);
@@ -1586,8 +1622,8 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
- if (!acl)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(acl))
+ return acl;
}
hci_conn_hold(acl);
@@ -1655,9 +1691,9 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER);
- if (!sco) {
+ if (IS_ERR(sco)) {
hci_conn_drop(acl);
- return ERR_PTR(-ENOMEM);
+ return sco;
}
}
@@ -1722,34 +1758,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
+ DEFINE_FLEX(struct hci_cp_le_set_cig_params, pdu, cis, num_cis, 0x1f);
u8 cig_id = PTR_UINT(data);
struct hci_conn *conn;
struct bt_iso_qos *qos;
- struct iso_cig_params pdu;
+ u8 aux_num_cis = 0;
u8 cis_id;
conn = hci_conn_hash_lookup_cig(hdev, cig_id);
if (!conn)
return 0;
- memset(&pdu, 0, sizeof(pdu));
-
qos = &conn->iso_qos;
- pdu.cp.cig_id = cig_id;
- hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
- pdu.cp.sca = qos->ucast.sca;
- pdu.cp.packing = qos->ucast.packing;
- pdu.cp.framing = qos->ucast.framing;
- pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+ pdu->cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu->c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu->p_interval);
+ pdu->sca = qos->ucast.sca;
+ pdu->packing = qos->ucast.packing;
+ pdu->framing = qos->ucast.framing;
+ pdu->c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu->p_latency = cpu_to_le16(qos->ucast.in.latency);
/* Reprogram all CIS(s) with the same CIG, valid range are:
* num_cis: 0x00 to 0x1F
* cis_id: 0x00 to 0xEF
*/
for (cis_id = 0x00; cis_id < 0xf0 &&
- pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ aux_num_cis < pdu->num_cis; cis_id++) {
struct hci_cis_params *cis;
conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
@@ -1758,7 +1793,7 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
qos = &conn->iso_qos;
- cis = &pdu.cis[pdu.cp.num_cis++];
+ cis = &pdu->cis[aux_num_cis++];
cis->cis_id = cis_id;
cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
@@ -1769,14 +1804,14 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
cis->c_rtn = qos->ucast.out.rtn;
cis->p_rtn = qos->ucast.in.rtn;
}
+ pdu->num_cis = aux_num_cis;
- if (!pdu.cp.num_cis)
+ if (!pdu->num_cis)
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
- sizeof(pdu.cp) +
- pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
- HCI_CMD_TIMEOUT);
+ struct_size(pdu, cis, pdu->num_cis),
+ pdu, HCI_CMD_TIMEOUT);
}
static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
@@ -1847,8 +1882,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
qos->ucast.cis);
if (!cis) {
cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!cis)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(cis))
+ return cis;
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
@@ -1983,14 +2018,8 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
struct bt_iso_io_qos *qos, __u8 phy)
{
/* Only set MTU if PHY is enabled */
- if (!qos->sdu && qos->phy) {
- if (hdev->iso_mtu > 0)
- qos->sdu = hdev->iso_mtu;
- else if (hdev->le_mtu > 0)
- qos->sdu = hdev->le_mtu;
- else
- qos->sdu = hdev->acl_mtu;
- }
+ if (!qos->sdu && qos->phy)
+ qos->sdu = conn->mtu;
/* Use the same PHY as ACL if set to any */
if (qos->phy == BT_ISO_PHY_ANY)
@@ -2059,18 +2088,31 @@ static int create_pa_sync(struct hci_dev *hdev, void *data)
return hci_update_passive_scan_sync(hdev);
}
-int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
- __u8 sid, struct bt_iso_qos *qos)
+struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos)
{
struct hci_cp_le_pa_create_sync *cp;
+ struct hci_conn *conn;
+ int err;
if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+
+ conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
+ if (IS_ERR(conn))
+ return conn;
+
+ conn->iso_qos = *qos;
+ conn->state = BT_LISTEN;
+
+ hci_conn_hold(conn);
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp) {
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- return -ENOMEM;
+ hci_conn_drop(conn);
+ return ERR_PTR(-ENOMEM);
}
cp->options = qos->bcast.options;
@@ -2082,20 +2124,24 @@ int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
cp->sync_cte_type = qos->bcast.sync_cte_type;
/* Queue start pa_create_sync and scan */
- return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
+ err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
+ if (err < 0) {
+ hci_conn_drop(conn);
+ kfree(cp);
+ return ERR_PTR(err);
+ }
+
+ return conn;
}
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
- struct _packed {
- struct hci_cp_le_big_create_sync cp;
- __u8 bis[0x11];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
int err;
- if (num_bis < 0x01 || num_bis > sizeof(pdu.bis))
+ if (num_bis < 0x01 || num_bis > pdu->num_bis)
return -EINVAL;
err = qos_set_big(hdev, qos);
@@ -2105,18 +2151,17 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
if (hcon)
hcon->iso_qos.bcast.big = qos->bcast.big;
- memset(&pdu, 0, sizeof(pdu));
- pdu.cp.handle = qos->bcast.big;
- pdu.cp.sync_handle = cpu_to_le16(sync_handle);
- pdu.cp.encryption = qos->bcast.encryption;
- memcpy(pdu.cp.bcode, qos->bcast.bcode, sizeof(pdu.cp.bcode));
- pdu.cp.mse = qos->bcast.mse;
- pdu.cp.timeout = cpu_to_le16(qos->bcast.timeout);
- pdu.cp.num_bis = num_bis;
- memcpy(pdu.bis, bis, num_bis);
+ pdu->handle = qos->bcast.big;
+ pdu->sync_handle = cpu_to_le16(sync_handle);
+ pdu->encryption = qos->bcast.encryption;
+ memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode));
+ pdu->mse = qos->bcast.mse;
+ pdu->timeout = cpu_to_le16(qos->bcast.timeout);
+ pdu->num_bis = num_bis;
+ memcpy(pdu->bis, bis, num_bis);
return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- sizeof(pdu.cp) + num_bis, &pdu);
+ struct_size(pdu, bis, num_bis), pdu);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0592369579..6ecb110bf4 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -63,50 +63,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-static int hci_scan_req(struct hci_request *req, unsigned long opt)
-{
- __u8 scan = opt;
-
- BT_DBG("%s %x", req->hdev->name, scan);
-
- /* Inquiry and Page scans */
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
- return 0;
-}
-
-static int hci_auth_req(struct hci_request *req, unsigned long opt)
-{
- __u8 auth = opt;
-
- BT_DBG("%s %x", req->hdev->name, auth);
-
- /* Authentication */
- hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
- return 0;
-}
-
-static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
-{
- __u8 encrypt = opt;
-
- BT_DBG("%s %x", req->hdev->name, encrypt);
-
- /* Encryption */
- hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
- return 0;
-}
-
-static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
-{
- __le16 policy = cpu_to_le16(opt);
-
- BT_DBG("%s %x", req->hdev->name, policy);
-
- /* Default link policy */
- hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
- return 0;
-}
-
/* Get HCI device by index.
* Device is held on return. */
struct hci_dev *hci_dev_get(int index)
@@ -149,8 +105,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
{
int old_state = hdev->discovery.state;
- BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state);
-
if (old_state == state)
return;
@@ -173,6 +127,8 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
case DISCOVERY_STOPPING:
break;
}
+
+ bt_dev_dbg(hdev, "state %u -> %u", old_state, state);
}
void hci_inquiry_cache_flush(struct hci_dev *hdev)
@@ -395,11 +351,6 @@ int hci_inquiry(void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -733,6 +684,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
struct hci_dev *hdev;
struct hci_dev_req dr;
+ __le16 policy;
int err = 0;
if (copy_from_user(&dr, arg, sizeof(dr)))
@@ -752,11 +704,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -764,8 +711,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -776,19 +723,21 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev,
+ HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
if (err)
break;
}
- err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
/* Ensure that the connectable and discoverable states
* get correctly modified as this was a non-mgmt change.
@@ -798,8 +747,10 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
break;
case HCISETLINKPOL:
- err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ policy = cpu_to_le16(dr.dev_opt);
+
+ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
+ 2, &policy, HCI_CMD_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -910,7 +861,7 @@ int hci_get_dev_info(void __user *arg)
strscpy(di.name, hdev->name, sizeof(di.name));
di.bdaddr = hdev->bdaddr;
- di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
+ di.type = (hdev->bus & 0x0f);
di.flags = flags;
di.pkt_type = hdev->pkt_type;
if (lmp_bredr_capable(hdev)) {
@@ -940,20 +891,51 @@ int hci_get_dev_info(void __user *arg)
/* ---- Interface to HCI drivers ---- */
+static int hci_dev_do_poweroff(struct hci_dev *hdev)
+{
+ int err;
+
+ BT_DBG("%s %p", hdev->name, hdev);
+
+ hci_req_sync_lock(hdev);
+
+ err = hci_set_powered_sync(hdev, false);
+
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+
static int hci_rfkill_set_block(void *data, bool blocked)
{
struct hci_dev *hdev = data;
+ int err;
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
+ if (blocked == hci_dev_test_flag(hdev, HCI_RFKILLED))
+ return 0;
+
if (blocked) {
hci_dev_set_flag(hdev, HCI_RFKILLED);
+
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
- !hci_dev_test_flag(hdev, HCI_CONFIG))
- hci_dev_do_close(hdev);
+ !hci_dev_test_flag(hdev, HCI_CONFIG)) {
+ err = hci_dev_do_poweroff(hdev);
+ if (err) {
+ bt_dev_err(hdev, "Error when powering off device on rfkill (%d)",
+ err);
+
+ /* Make sure the device is still closed even if
+ * anything during power off sequence (eg.
+ * disconnecting devices) failed.
+ */
+ hci_dev_do_close(hdev);
+ }
+ }
} else {
hci_dev_clear_flag(hdev, HCI_RFKILLED);
}
@@ -995,8 +977,7 @@ static void hci_power_on(struct work_struct *work)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_do_close(hdev);
@@ -1738,6 +1719,15 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
adv->pending = true;
adv->instance = instance;
+
+ /* If controller support only one set and the instance is set to
+ * 1 then there is no option other than using handle 0x00.
+ */
+ if (hdev->le_num_of_adv_sets == 1 && instance == 1)
+ adv->handle = 0x00;
+ else
+ adv->handle = instance;
+
list_add(&adv->list, &hdev->adv_instances);
hdev->adv_instance_cnt++;
}
@@ -2492,16 +2482,16 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_adv_channel_map = 0x07;
hdev->le_adv_min_interval = 0x0800;
hdev->le_adv_max_interval = 0x0800;
- hdev->le_scan_interval = 0x0060;
- hdev->le_scan_window = 0x0030;
- hdev->le_scan_int_suspend = 0x0400;
- hdev->le_scan_window_suspend = 0x0012;
+ hdev->le_scan_interval = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_suspend = DISCOV_LE_SCAN_INT_SLOW1;
+ hdev->le_scan_window_suspend = DISCOV_LE_SCAN_WIN_SLOW1;
hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
- hdev->le_scan_int_adv_monitor = 0x0060;
- hdev->le_scan_window_adv_monitor = 0x0030;
- hdev->le_scan_int_connect = 0x0060;
- hdev->le_scan_window_connect = 0x0060;
+ hdev->le_scan_int_adv_monitor = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window_adv_monitor = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_connect = DISCOV_LE_SCAN_INT_CONN;
+ hdev->le_scan_window_connect = DISCOV_LE_SCAN_WIN_CONN;
hdev->le_conn_min_interval = 0x0018;
hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
@@ -2518,7 +2508,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION;
- hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT;
+ hdev->def_le_autoconnect_timeout = HCI_LE_CONN_TIMEOUT;
hdev->min_le_tx_power = HCI_TX_POWER_INVALID;
hdev->max_le_tx_power = HCI_TX_POWER_INVALID;
@@ -2604,20 +2594,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (!hdev->open || !hdev->close || !hdev->send)
return -EINVAL;
- /* Do not allow HCI_AMP devices to register at index 0,
- * so the index can be used as the AMP controller ID.
- */
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
- break;
- case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
- break;
- default:
- return -EINVAL;
- }
-
+ id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
if (id < 0)
return id;
@@ -2669,12 +2646,10 @@ int hci_register_dev(struct hci_dev *hdev)
hci_dev_set_flag(hdev, HCI_SETUP);
hci_dev_set_flag(hdev, HCI_AUTO_OFF);
- if (hdev->dev_type == HCI_PRIMARY) {
- /* Assume BR/EDR support until proven otherwise (such as
- * through reading supported features during init.
- */
- hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
- }
+ /* Assume BR/EDR support until proven otherwise (such as
+ * through reading supported features during init.
+ */
+ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
write_lock(&hci_dev_list_lock);
list_add(&hdev->list, &hci_dev_list);
@@ -2711,7 +2686,7 @@ err_wqueue:
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
err:
- ida_simple_remove(&hci_index_ida, hdev->id);
+ ida_free(&hci_index_ida, hdev->id);
return error;
}
@@ -2730,14 +2705,16 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->rx_work);
+ cancel_work_sync(&hdev->cmd_work);
+ cancel_work_sync(&hdev->tx_work);
cancel_work_sync(&hdev->power_on);
+ cancel_work_sync(&hdev->error_reset);
hci_cmd_sync_clear(hdev);
hci_unregister_suspend_notifier(hdev);
- msft_unregister(hdev);
-
hci_dev_do_close(hdev);
if (!test_bit(HCI_INIT, &hdev->flags) &&
@@ -2791,10 +2768,11 @@ void hci_release_dev(struct hci_dev *hdev)
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
hci_codec_list_clear(&hdev->local_codecs);
+ msft_release(hdev);
hci_dev_unlock(hdev);
ida_destroy(&hdev->unset_handle_ida);
- ida_simple_remove(&hci_index_ida, hdev->id);
+ ida_free(&hci_index_ida, hdev->id);
kfree_skb(hdev->sent_cmd);
kfree_skb(hdev->req_skb);
kfree_skb(hdev->recv_event);
@@ -3211,17 +3189,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hci_add_acl_hdr(skb, conn->handle, flags);
- break;
- case HCI_AMP:
- hci_add_acl_hdr(skb, chan->handle, flags);
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- return;
- }
+ hci_add_acl_hdr(skb, conn->handle, flags);
list = skb_shinfo(skb)->frag_list;
if (!list) {
@@ -3381,9 +3349,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
case ACL_LINK:
cnt = hdev->acl_cnt;
break;
- case AMP_LINK:
- cnt = hdev->block_cnt;
- break;
case SCO_LINK:
case ESCO_LINK:
cnt = hdev->sco_cnt;
@@ -3581,12 +3546,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type)
}
-static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
-{
- /* Calculate count of blocks used by this packet */
- return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
-}
-
static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
unsigned long last_tx;
@@ -3700,81 +3659,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
hci_prio_recalculate(hdev, ACL_LINK);
}
-static void hci_sched_acl_blk(struct hci_dev *hdev)
-{
- unsigned int cnt = hdev->block_cnt;
- struct hci_chan *chan;
- struct sk_buff *skb;
- int quote;
- u8 type;
-
- BT_DBG("%s", hdev->name);
-
- if (hdev->dev_type == HCI_AMP)
- type = AMP_LINK;
- else
- type = ACL_LINK;
-
- __check_timeout(hdev, cnt, type);
-
- while (hdev->block_cnt > 0 &&
- (chan = hci_chan_sent(hdev, type, &quote))) {
- u32 priority = (skb_peek(&chan->data_q))->priority;
- while (quote > 0 && (skb = skb_peek(&chan->data_q))) {
- int blocks;
-
- BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
- skb->len, skb->priority);
-
- /* Stop if priority has changed */
- if (skb->priority < priority)
- break;
-
- skb = skb_dequeue(&chan->data_q);
-
- blocks = __get_blocks(hdev, skb);
- if (blocks > hdev->block_cnt)
- return;
-
- hci_conn_enter_active_mode(chan->conn,
- bt_cb(skb)->force_active);
-
- hci_send_frame(hdev, skb);
- hdev->acl_last_tx = jiffies;
-
- hdev->block_cnt -= blocks;
- quote -= blocks;
-
- chan->sent += blocks;
- chan->conn->sent += blocks;
- }
- }
-
- if (cnt != hdev->block_cnt)
- hci_prio_recalculate(hdev, type);
-}
-
static void hci_sched_acl(struct hci_dev *hdev)
{
BT_DBG("%s", hdev->name);
/* No ACL link over BR/EDR controller */
- if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
- return;
-
- /* No AMP link over AMP controller */
- if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP)
+ if (!hci_conn_num(hdev, ACL_LINK))
return;
- switch (hdev->flow_ctl_mode) {
- case HCI_FLOW_CTL_MODE_PACKET_BASED:
- hci_sched_acl_pkt(hdev);
- break;
-
- case HCI_FLOW_CTL_MODE_BLOCK_BASED:
- hci_sched_acl_blk(hdev);
- break;
- }
+ hci_sched_acl_pkt(hdev);
}
static void hci_sched_le(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 9d1063c51e..a78f6d706c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -913,21 +913,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_flow_control_mode *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->flow_ctl_mode = rp->mode;
-
- return rp->status;
-}
-
static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -954,6 +939,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->sco_mtu, hdev->sco_pkts);
+ if (!hdev->acl_mtu || !hdev->acl_pkts)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1068,28 +1056,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_data_block_size *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->block_mtu = __le16_to_cpu(rp->max_acl_len);
- hdev->block_len = __le16_to_cpu(rp->block_len);
- hdev->num_blocks = __le16_to_cpu(rp->num_blocks);
-
- hdev->block_cnt = hdev->num_blocks;
-
- BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
- hdev->block_cnt, hdev->block_len);
-
- return rp->status;
-}
-
static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1124,30 +1090,6 @@ unlock:
return rp->status;
}
-static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_local_amp_info *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->amp_status = rp->amp_status;
- hdev->amp_total_bw = __le32_to_cpu(rp->total_bw);
- hdev->amp_max_bw = __le32_to_cpu(rp->max_bw);
- hdev->amp_min_latency = __le32_to_cpu(rp->min_latency);
- hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu);
- hdev->amp_type = rp->amp_type;
- hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap);
- hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size);
- hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
- hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
-
- return rp->status;
-}
-
static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1263,6 +1205,9 @@ static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1777,10 +1722,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
switch (enable) {
case LE_SCAN_ENABLE:
hci_dev_set_flag(hdev, HCI_LE_SCAN);
- if (hdev->le_scan_type == LE_SCAN_ACTIVE)
+ if (hdev->le_scan_type == LE_SCAN_ACTIVE) {
clear_pending_adv_report(hdev);
- if (hci_dev_test_flag(hdev, HCI_MESH))
hci_discovery_set_state(hdev, DISCOVERY_FINDING);
+ }
break;
case LE_SCAN_DISABLE:
@@ -2342,8 +2287,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
if (!conn) {
conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr,
HCI_ROLE_MASTER);
- if (!conn)
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn))
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
}
}
@@ -3154,8 +3099,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
BDADDR_BREDR)) {
conn = hci_conn_add_unset(hdev, ev->link_type,
&ev->bdaddr, HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new conn");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
} else {
@@ -3343,8 +3288,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
if (!conn) {
conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
}
@@ -3821,6 +3766,9 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -4112,12 +4060,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type,
sizeof(struct hci_rp_read_page_scan_type)),
HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type),
- HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size,
- sizeof(struct hci_rp_read_data_block_size)),
- HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode,
- sizeof(struct hci_rp_read_flow_control_mode)),
- HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info,
- sizeof(struct hci_rp_read_local_amp_info)),
HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock,
sizeof(struct hci_rp_read_clock)),
HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size,
@@ -4308,7 +4250,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
/* Remove connection if command failed */
- for (i = 0; cp->num_cis; cp->num_cis--, i++) {
+ for (i = 0; i < cp->num_cis; i++) {
struct hci_conn *conn;
u16 handle;
@@ -4324,6 +4266,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_conn_del(conn);
}
}
+ cp->num_cis = 0;
if (pending)
hci_le_create_cis_pending(hdev);
@@ -4452,11 +4395,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, handles, ev->num)))
return;
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
- return;
- }
-
bt_dev_dbg(hdev, "num %d", ev->num);
for (i = 0; i < ev->num; i++) {
@@ -4524,78 +4462,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
queue_work(hdev->workqueue, &hdev->tx_work);
}
-static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
- __u16 handle)
-{
- struct hci_chan *chan;
-
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- return hci_conn_hash_lookup_handle(hdev, handle);
- case HCI_AMP:
- chan = hci_chan_lookup_handle(hdev, handle);
- if (chan)
- return chan->conn;
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- break;
- }
-
- return NULL;
-}
-
-static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_num_comp_blocks *ev = data;
- int i;
-
- if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS,
- flex_array_size(ev, handles, ev->num_hndl)))
- return;
-
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d",
- hdev->flow_ctl_mode);
- return;
- }
-
- bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks,
- ev->num_hndl);
-
- for (i = 0; i < ev->num_hndl; i++) {
- struct hci_comp_blocks_info *info = &ev->handles[i];
- struct hci_conn *conn = NULL;
- __u16 handle, block_count;
-
- handle = __le16_to_cpu(info->handle);
- block_count = __le16_to_cpu(info->blocks);
-
- conn = __hci_conn_lookup_handle(hdev, handle);
- if (!conn)
- continue;
-
- conn->sent -= block_count;
-
- switch (conn->type) {
- case ACL_LINK:
- case AMP_LINK:
- hdev->block_cnt += block_count;
- if (hdev->block_cnt > hdev->num_blocks)
- hdev->block_cnt = hdev->num_blocks;
- break;
-
- default:
- bt_dev_err(hdev, "unknown type %d conn %p",
- conn->type, conn);
- break;
- }
- }
-
- queue_work(hdev->workqueue, &hdev->tx_work);
-}
-
static void hci_mode_change_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -5688,150 +5554,6 @@ unlock:
hci_dev_unlock(hdev);
}
-#if IS_ENABLED(CONFIG_BT_HS)
-static void hci_chan_selected_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_channel_selected *ev = data;
- struct hci_conn *hcon;
-
- bt_dev_dbg(hdev, "handle 0x%2.2x", ev->phy_handle);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- return;
-
- amp_read_loc_assoc_final_data(hdev, hcon);
-}
-
-static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_phy_link_complete *ev = data;
- struct hci_conn *hcon, *bredr_hcon;
-
- bt_dev_dbg(hdev, "handle 0x%2.2x status 0x%2.2x", ev->phy_handle,
- ev->status);
-
- hci_dev_lock(hdev);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- goto unlock;
-
- if (!hcon->amp_mgr)
- goto unlock;
-
- if (ev->status) {
- hci_conn_del(hcon);
- goto unlock;
- }
-
- bredr_hcon = hcon->amp_mgr->l2cap_conn->hcon;
-
- hcon->state = BT_CONNECTED;
- bacpy(&hcon->dst, &bredr_hcon->dst);
-
- hci_conn_hold(hcon);
- hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
- hci_conn_drop(hcon);
-
- hci_debugfs_create_conn(hcon);
- hci_conn_add_sysfs(hcon);
-
- amp_physical_cfm(bredr_hcon, hcon);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
-static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_logical_link_complete *ev = data;
- struct hci_conn *hcon;
- struct hci_chan *hchan;
- struct amp_mgr *mgr;
-
- bt_dev_dbg(hdev, "log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x",
- le16_to_cpu(ev->handle), ev->phy_handle, ev->status);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- return;
-
- /* Create AMP hchan */
- hchan = hci_chan_create(hcon);
- if (!hchan)
- return;
-
- hchan->handle = le16_to_cpu(ev->handle);
- hchan->amp = true;
-
- BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan);
-
- mgr = hcon->amp_mgr;
- if (mgr && mgr->bredr_chan) {
- struct l2cap_chan *bredr_chan = mgr->bredr_chan;
-
- l2cap_chan_lock(bredr_chan);
-
- bredr_chan->conn->mtu = hdev->block_mtu;
- l2cap_logical_cfm(bredr_chan, hchan, 0);
- hci_conn_hold(hcon);
-
- l2cap_chan_unlock(bredr_chan);
- }
-}
-
-static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_disconn_logical_link_complete *ev = data;
- struct hci_chan *hchan;
-
- bt_dev_dbg(hdev, "handle 0x%4.4x status 0x%2.2x",
- le16_to_cpu(ev->handle), ev->status);
-
- if (ev->status)
- return;
-
- hci_dev_lock(hdev);
-
- hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
- if (!hchan || !hchan->amp)
- goto unlock;
-
- amp_destroy_logical_link(hchan, ev->reason);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
-static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_disconn_phy_link_complete *ev = data;
- struct hci_conn *hcon;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
-
- if (ev->status)
- return;
-
- hci_dev_lock(hdev);
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon && hcon->type == AMP_LINK) {
- hcon->state = BT_CLOSED;
- hci_disconn_cfm(hcon, ev->reason);
- hci_conn_del(hcon);
- }
-
- hci_dev_unlock(hdev);
-}
-#endif
-
static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *local_rpa)
{
@@ -5912,8 +5634,8 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
goto unlock;
conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
@@ -6590,6 +6312,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
+
+ if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+ &hdev->quirks)) {
+ info->primary_phy &= 0x1f;
+ info->secondary_phy &= 0x1f;
+ }
+
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
@@ -6637,14 +6366,16 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
- if (ev->status) {
- /* Add connection to indicate the failed PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
+ /* Add connection to indicate PA sync event */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
- if (!pa_sync)
- goto unlock;
+ if (IS_ERR(pa_sync))
+ goto unlock;
+
+ pa_sync->sync_handle = le16_to_cpu(ev->handle);
+ if (ev->status) {
set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags);
/* Notify iso layer */
@@ -6661,6 +6392,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_per_adv_report *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *pa_sync;
bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
@@ -6668,8 +6400,28 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT))
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ goto unlock;
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+ pa_sync = hci_conn_hash_lookup_pa_sync_handle
+ (hdev,
+ le16_to_cpu(ev->sync_handle));
+
+ if (!pa_sync)
+ goto unlock;
+
+ if (ev->data_status == LE_PA_DATA_COMPLETE &&
+ !test_and_set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags)) {
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, 0);
+
+ /* Notify MGMT layer */
+ mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ }
+
+unlock:
hci_dev_unlock(hdev);
}
@@ -6916,6 +6668,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct bt_iso_qos *qos;
bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
+ u32 c_sdu_interval, p_sdu_interval;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6940,12 +6693,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
- /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
- qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
- qos->ucast.out.interval = qos->ucast.in.interval;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G
+ * page 3075:
+ * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) ×
+ * ISO_Interval + SDU_Interval_C_To_P
+ * ...
+ * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) -
+ * Transport_Latency
+ */
+ c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->c_latency);
+ p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->p_latency);
switch (conn->role) {
case HCI_ROLE_SLAVE:
+ qos->ucast.in.interval = c_sdu_interval;
+ qos->ucast.out.interval = p_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.in.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6959,6 +6725,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos->ucast.out.phy = ev->p_phy;
break;
case HCI_ROLE_MASTER:
+ qos->ucast.in.interval = p_sdu_interval;
+ qos->ucast.out.interval = c_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.out.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -7042,7 +6810,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
if (!cis) {
cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE,
cis_handle);
- if (!cis) {
+ if (IS_ERR(cis)) {
hci_le_reject_cis(hdev, ev->cis_handle);
goto unlock;
}
@@ -7149,9 +6917,13 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis = hci_conn_hash_lookup_handle(hdev, handle);
if (!bis) {
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_dbg(hdev, "ignore too large handle %u", handle);
+ continue;
+ }
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE, handle);
- if (!bis)
+ if (IS_ERR(bis))
continue;
}
@@ -7181,6 +6953,8 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
u16 handle = le16_to_cpu(ev->bis[i]);
bis = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!bis)
+ continue;
set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
hci_connect_cfm(bis, ev->status);
@@ -7202,10 +6976,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT)) {
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ if (!(mask & HCI_LM_ACCEPT))
goto unlock;
- }
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
@@ -7214,24 +6986,13 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
(hdev,
le16_to_cpu(ev->sync_handle));
- if (pa_sync)
- goto unlock;
-
- /* Add connection to indicate the PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
-
if (!pa_sync)
goto unlock;
- pa_sync->sync_handle = le16_to_cpu(ev->sync_handle);
- set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags);
+ pa_sync->iso_qos.bcast.encryption = ev->encryption;
/* Notify iso layer */
- hci_connect_cfm(pa_sync, 0x00);
-
- /* Notify MGMT layer */
- mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ hci_connect_cfm(pa_sync, 0);
unlock:
hci_dev_unlock(hdev);
@@ -7645,28 +7406,6 @@ static const struct hci_ev {
/* [0x3e = HCI_EV_LE_META] */
HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt,
sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE),
-#if IS_ENABLED(CONFIG_BT_HS)
- /* [0x40 = HCI_EV_PHY_LINK_COMPLETE] */
- HCI_EV(HCI_EV_PHY_LINK_COMPLETE, hci_phy_link_complete_evt,
- sizeof(struct hci_ev_phy_link_complete)),
- /* [0x41 = HCI_EV_CHANNEL_SELECTED] */
- HCI_EV(HCI_EV_CHANNEL_SELECTED, hci_chan_selected_evt,
- sizeof(struct hci_ev_channel_selected)),
- /* [0x42 = HCI_EV_DISCONN_PHY_LINK_COMPLETE] */
- HCI_EV(HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE,
- hci_disconn_loglink_complete_evt,
- sizeof(struct hci_ev_disconn_logical_link_complete)),
- /* [0x45 = HCI_EV_LOGICAL_LINK_COMPLETE] */
- HCI_EV(HCI_EV_LOGICAL_LINK_COMPLETE, hci_loglink_complete_evt,
- sizeof(struct hci_ev_logical_link_complete)),
- /* [0x46 = HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE] */
- HCI_EV(HCI_EV_DISCONN_PHY_LINK_COMPLETE,
- hci_disconn_phylink_complete_evt,
- sizeof(struct hci_ev_disconn_phy_link_complete)),
-#endif
- /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */
- HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
- sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 0be75cf0ef..c91f2838f5 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -29,10 +29,6 @@
#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
-#define HCI_REQ_DONE 0
-#define HCI_REQ_PEND 1
-#define HCI_REQ_CANCELED 2
-
struct hci_request {
struct hci_dev *hdev;
struct sk_buff_head cmd_q;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 3f5f093233..69c2ba1e84 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -101,7 +101,7 @@ static bool hci_sock_gen_cookie(struct sock *sk)
int id = hci_pi(sk)->cookie;
if (!id) {
- id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL);
+ id = ida_alloc_min(&sock_cookie_ida, 1, GFP_KERNEL);
if (id < 0)
id = 0xffffffff;
@@ -119,7 +119,7 @@ static void hci_sock_free_cookie(struct sock *sk)
if (id) {
hci_pi(sk)->cookie = 0xffffffff;
- ida_simple_remove(&sock_cookie_ida, id);
+ ida_free(&sock_cookie_ida, id);
}
}
@@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
return NULL;
ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
- ni->type = hdev->dev_type;
+ ni->type = 0x00; /* Old hdev->dev_type */
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
@@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
return -EOPNOTSUPP;
- if (hdev->dev_type != HCI_PRIMARY)
- return -EOPNOTSUPP;
-
switch (cmd) {
case HCISETRAW:
if (!capable(CAP_NET_ADMIN))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 40b71bc505..4e90bd722e 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -280,6 +280,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
}
EXPORT_SYMBOL(__hci_cmd_sync_status);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout);
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+EXPORT_SYMBOL(hci_cmd_sync_status);
+
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
@@ -358,8 +371,6 @@ static void le_scan_disable(struct work_struct *work)
goto _return;
}
- hdev->discovery.scan_start = 0;
-
/* If we were running LE only scan, change discovery state. If
* we were running both LE and BR/EDR inquiry simultaneously,
* and BR/EDR inquiry is already finished, stop discovery,
@@ -1043,11 +1054,10 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
struct hci_cp_ext_adv_set *set;
u8 data[sizeof(*cp) + sizeof(*set) * 1];
u8 size;
+ struct adv_info *adv = NULL;
/* If request specifies an instance that doesn't exist, fail */
if (instance > 0) {
- struct adv_info *adv;
-
adv = hci_find_adv_instance(hdev, instance);
if (!adv)
return -EINVAL;
@@ -1066,7 +1076,7 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp->num_of_sets = !!instance;
cp->enable = 0x00;
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets;
@@ -1195,7 +1205,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.own_addr_type = own_addr_type;
cp.channel_map = hdev->le_adv_channel_map;
- cp.handle = instance;
+ cp.handle = adv ? adv->handle : instance;
if (flags & MGMT_ADV_FLAG_SEC_2M) {
cp.primary_phy = HCI_ADV_PHY_1M;
@@ -1235,31 +1245,27 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_scan_rsp_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->scan_rsp_changed)
return 0;
}
- len = eir_create_scan_rsp(hdev, instance, pdu.data);
+ len = eir_create_scan_rsp(hdev, instance, pdu->data);
- pdu.cp.handle = instance;
- pdu.cp.length = len;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->length = len;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1267,7 +1273,7 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->scan_rsp_changed = false;
} else {
- memcpy(hdev->scan_rsp_data, pdu.data, len);
+ memcpy(hdev->scan_rsp_data, pdu->data, len);
hdev->scan_rsp_data_len = len;
}
@@ -1335,7 +1341,7 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance)
memset(set, 0, sizeof(*set));
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
/* Set duration per instance since controller is responsible for
* scheduling it.
@@ -1411,29 +1417,25 @@ static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance,
static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_per_adv_data cp;
- u8 data[HCI_MAX_PER_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length,
+ HCI_MAX_PER_AD_LENGTH);
u8 len;
-
- memset(&pdu, 0, sizeof(pdu));
+ struct adv_info *adv = NULL;
if (instance) {
- struct adv_info *adv = hci_find_adv_instance(hdev, instance);
-
+ adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->periodic)
return 0;
}
- len = eir_create_per_adv_data(hdev, instance, pdu.data);
+ len = eir_create_per_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
}
@@ -1727,31 +1729,27 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_adv_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->adv_data_changed)
return 0;
}
- len = eir_create_adv_data(hdev, instance, pdu.data);
+ len = eir_create_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1760,7 +1758,7 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->adv_data_changed = false;
} else {
- memcpy(hdev->adv_data, pdu.data, len);
+ memcpy(hdev->adv_data, pdu->data, len);
hdev->adv_data_len = len;
}
@@ -2569,6 +2567,16 @@ static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
return p;
}
+/* Clear LE Accept List */
+static int hci_le_clear_accept_list_sync(struct hci_dev *hdev)
+{
+ if (!(hdev->commands[26] & 0x80))
+ return 0;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL,
+ HCI_CMD_TIMEOUT);
+}
+
/* Device must not be scanning when updating the accept list.
*
* Update is done using the following sequence:
@@ -2617,6 +2625,31 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
goto done;
}
+ /* Force address filtering if PA Sync is in progress */
+ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
+ struct hci_cp_le_pa_create_sync *sent;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC);
+ if (sent) {
+ struct conn_params pa;
+
+ memset(&pa, 0, sizeof(pa));
+
+ bacpy(&pa.addr, &sent->addr);
+ pa.addr_type = sent->addr_type;
+
+ /* Clear first since there could be addresses left
+ * behind.
+ */
+ hci_le_clear_accept_list_sync(hdev);
+
+ num_entries = 1;
+ err = hci_le_add_accept_list_sync(hdev, &pa,
+ &num_entries);
+ goto done;
+ }
+ }
+
/* Go through the current accept list programmed into the
* controller one by one and check if that address is connected or is
* still in the list of pending connections or list of devices to
@@ -2896,6 +2929,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
*/
filter_policy = hci_update_accept_list_sync(hdev);
+ /* If suspended and filter_policy set to 0x00 (no acceptlist) then
+ * passive scanning cannot be started since that would require the host
+ * to be woken up to process the reports.
+ */
+ if (hdev->suspended && !filter_policy) {
+ /* Check if accept list is empty then there is no need to scan
+ * while suspended.
+ */
+ if (list_empty(&hdev->le_accept_list))
+ return 0;
+
+ /* If there are devices is the accept_list that means some
+ * devices could not be programmed which in non-suspended case
+ * means filter_policy needs to be set to 0x00 so the host needs
+ * to filter, but since this is treating suspended case we
+ * can ignore device needing host to filter to allow devices in
+ * the acceptlist to be able to wakeup the system.
+ */
+ filter_policy = 0x01;
+ }
+
/* When the controller is using random resolvable addresses and
* with that having LE privacy enabled, then controllers with
* Extended Scanner Filter Policies support can now enable support
@@ -2918,6 +2972,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
} else if (hci_is_adv_monitoring(hdev)) {
window = hdev->le_scan_window_adv_monitor;
interval = hdev->le_scan_int_adv_monitor;
+
+ /* Disable duplicates filter when scanning for advertisement
+ * monitor for the following reasons.
+ *
+ * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
+ * controllers ignore RSSI_Sampling_Period when the duplicates
+ * filter is enabled.
+ *
+ * For SW pattern filtering, when we're not doing interleaved
+ * scanning, it is necessary to disable duplicates filter,
+ * otherwise hosts can only receive one advertisement and it's
+ * impossible to know if a peer is still in range.
+ */
+ filter_dups = LE_SCAN_FILTER_DUP_DISABLE;
} else {
window = hdev->le_scan_window;
interval = hdev->le_scan_interval;
@@ -3488,10 +3556,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev)
/* Read Local Supported Features. */
static int hci_read_local_features_sync(struct hci_dev *hdev)
{
- /* Not all AMP controllers support this command */
- if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20))
- return 0;
-
return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -3526,51 +3590,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev)
return 0;
}
-/* Read Local AMP Info */
-static int hci_read_local_amp_info_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Data Blk size */
-static int hci_read_data_block_size_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Flow Control Mode */
-static int hci_read_flow_control_mode_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Location Data */
-static int hci_read_location_data_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* AMP Controller init stage 1 command sequence */
-static const struct hci_init_stage amp_init1[] = {
- /* HCI_OP_READ_LOCAL_VERSION */
- HCI_INIT(hci_read_local_version_sync),
- /* HCI_OP_READ_LOCAL_COMMANDS */
- HCI_INIT(hci_read_local_cmds_sync),
- /* HCI_OP_READ_LOCAL_AMP_INFO */
- HCI_INIT(hci_read_local_amp_info_sync),
- /* HCI_OP_READ_DATA_BLOCK_SIZE */
- HCI_INIT(hci_read_data_block_size_sync),
- /* HCI_OP_READ_FLOW_CONTROL_MODE */
- HCI_INIT(hci_read_flow_control_mode_sync),
- /* HCI_OP_READ_LOCATION_DATA */
- HCI_INIT(hci_read_location_data_sync),
- {}
-};
-
static int hci_init1_sync(struct hci_dev *hdev)
{
int err;
@@ -3584,28 +3603,9 @@ static int hci_init1_sync(struct hci_dev *hdev)
return err;
}
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
- return hci_init_stage_sync(hdev, br_init1);
- case HCI_AMP:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
- return hci_init_stage_sync(hdev, amp_init1);
- default:
- bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
- break;
- }
-
- return 0;
+ return hci_init_stage_sync(hdev, br_init1);
}
-/* AMP Controller init stage 2 command sequence */
-static const struct hci_init_stage amp_init2[] = {
- /* HCI_OP_READ_LOCAL_FEATURES */
- HCI_INIT(hci_read_local_features_sync),
- {}
-};
-
/* Read Buffer Size (ACL mtu, max pkt, etc.) */
static int hci_read_buffer_size_sync(struct hci_dev *hdev)
{
@@ -3863,9 +3863,6 @@ static int hci_init2_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
- if (hdev->dev_type == HCI_AMP)
- return hci_init_stage_sync(hdev, amp_init2);
-
err = hci_init_stage_sync(hdev, hci_init2);
if (err)
return err;
@@ -4270,16 +4267,6 @@ static int hci_le_read_accept_list_size_sync(struct hci_dev *hdev)
0, NULL, HCI_CMD_TIMEOUT);
}
-/* Clear LE Accept List */
-static int hci_le_clear_accept_list_sync(struct hci_dev *hdev)
-{
- if (!(hdev->commands[26] & 0x80))
- return 0;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL,
- HCI_CMD_TIMEOUT);
-}
-
/* Read LE Resolving List Size */
static int hci_le_read_resolv_list_size_sync(struct hci_dev *hdev)
{
@@ -4703,13 +4690,6 @@ static int hci_init_sync(struct hci_dev *hdev)
if (err < 0)
return err;
- /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
- * BR/EDR/LE type controllers. AMP controllers only need the
- * first two stages of init.
- */
- if (hdev->dev_type != HCI_PRIMARY)
- return 0;
-
err = hci_init3_sync(hdev);
if (err < 0)
return err;
@@ -4938,12 +4918,8 @@ int hci_dev_open_sync(struct hci_dev *hdev)
* In case of user channel usage, it is not important
* if a public address or static random address is
* available.
- *
- * This check is only valid for BR/EDR controllers
- * since AMP controllers do not have an address.
*/
if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hdev->dev_type == HCI_PRIMARY &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY)) {
ret = -EADDRNOTAVAIL;
@@ -4978,8 +4954,7 @@ int hci_dev_open_sync(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hci_dev_test_flag(hdev, HCI_MGMT) &&
- hdev->dev_type == HCI_PRIMARY) {
+ hci_dev_test_flag(hdev, HCI_MGMT)) {
ret = hci_powered_update_sync(hdev);
mgmt_power_on(hdev, ret);
}
@@ -5124,8 +5099,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
- if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_MGMT))
__mgmt_power_off(hdev);
@@ -5187,9 +5161,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hdev->flags &= BIT(HCI_RAW);
hci_dev_clear_volatile_flags(hdev);
- /* Controller radio is available but is currently powered down */
- hdev->amp_status = AMP_STATUS_POWERED_DOWN;
-
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
@@ -5226,8 +5197,7 @@ static int hci_power_on_sync(struct hci_dev *hdev)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_close_sync(hdev);
@@ -5329,27 +5299,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
return 0;
}
-static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle,
- u8 reason)
-{
- struct hci_cp_disconn_phy_link cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.phy_handle = HCI_PHY_HANDLE(handle);
- cp.reason = reason;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_disconnect cp;
- if (conn->type == AMP_LINK)
- return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
-
if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
/* This is a BIS connection, hci_conn_del will
* do the necessary cleanup.
@@ -5586,27 +5540,33 @@ static int hci_power_off_sync(struct hci_dev *hdev)
if (!test_bit(HCI_UP, &hdev->flags))
return 0;
+ hci_dev_set_flag(hdev, HCI_POWERING_DOWN);
+
if (test_bit(HCI_ISCAN, &hdev->flags) ||
test_bit(HCI_PSCAN, &hdev->flags)) {
err = hci_write_scan_enable_sync(hdev, 0x00);
if (err)
- return err;
+ goto out;
}
err = hci_clear_adv_sync(hdev, NULL, false);
if (err)
- return err;
+ goto out;
err = hci_stop_discovery_sync(hdev);
if (err)
- return err;
+ goto out;
/* Terminated due to Power Off */
err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
if (err)
- return err;
+ goto out;
- return hci_dev_close_sync(hdev);
+ err = hci_dev_close_sync(hdev);
+
+out:
+ hci_dev_clear_flag(hdev, HCI_POWERING_DOWN);
+ return err;
}
int hci_set_powered_sync(struct hci_dev *hdev, u8 val)
@@ -6462,10 +6422,8 @@ done:
int hci_le_create_cis_sync(struct hci_dev *hdev)
{
- struct {
- struct hci_cp_le_create_cis cp;
- struct hci_cis cis[0x1f];
- } cmd;
+ DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f);
+ size_t aux_num_cis = 0;
struct hci_conn *conn;
u8 cig = BT_ISO_QOS_CIG_UNSET;
@@ -6492,8 +6450,6 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
* remains pending.
*/
- memset(&cmd, 0, sizeof(cmd));
-
hci_dev_lock(hdev);
rcu_read_lock();
@@ -6530,7 +6486,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
goto done;
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ struct hci_cis *cis = &cmd->cis[aux_num_cis];
if (hci_conn_check_create_cis(conn) ||
conn->iso_qos.ucast.cig != cig)
@@ -6539,25 +6495,25 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
+ aux_num_cis++;
- if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ if (aux_num_cis >= cmd->num_cis)
break;
}
+ cmd->num_cis = aux_num_cis;
done:
rcu_read_unlock();
hci_dev_unlock(hdev);
- if (!cmd.cp.num_cis)
+ if (!aux_num_cis)
return 0;
/* Wait for HCI_LE_CIS_Established */
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS,
- sizeof(cmd.cp) + sizeof(cmd.cis[0]) *
- cmd.cp.num_cis, &cmd,
- HCI_EVT_LE_CIS_ESTABLISHED,
+ struct_size(cmd, cis, cmd->num_cis),
+ cmd, HCI_EVT_LE_CIS_ESTABLISHED,
conn->conn_timeout, NULL);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index fa6c2e95d5..398fb81f7a 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -3,7 +3,7 @@
* BlueZ - Bluetooth protocol stack for Linux
*
* Copyright (C) 2022 Intel Corporation
- * Copyright 2023 NXP
+ * Copyright 2023-2024 NXP
*/
#include <linux/module.h>
@@ -54,7 +54,6 @@ static void iso_sock_kill(struct sock *sk);
enum {
BT_SK_BIG_SYNC,
BT_SK_PA_SYNC,
- BT_SK_PA_SYNC_TERM,
};
struct iso_pinfo {
@@ -81,12 +80,14 @@ static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
static bool iso_match_sid(struct sock *sk, void *data);
static bool iso_match_sync_handle(struct sock *sk, void *data);
+static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data);
static void iso_sock_disconn(struct sock *sk);
typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data);
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data);
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
@@ -196,21 +197,10 @@ static void iso_chan_del(struct sock *sk, int err)
sock_set_flag(sk, SOCK_ZAPPED);
}
-static bool iso_match_conn_sync_handle(struct sock *sk, void *data)
-{
- struct hci_conn *hcon = data;
-
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags))
- return false;
-
- return hcon->sync_handle == iso_pi(sk)->sync_handle;
-}
-
static void iso_conn_del(struct hci_conn *hcon, int err)
{
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
- struct sock *parent;
if (!conn)
return;
@@ -226,25 +216,6 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
if (sk) {
lock_sock(sk);
-
- /* While a PA sync hcon is in the process of closing,
- * mark parent socket with a flag, so that any residual
- * BIGInfo adv reports that arrive before PA sync is
- * terminated are not processed anymore.
- */
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_conn_sync_handle,
- hcon);
-
- if (parent) {
- set_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(parent)->flags);
- sock_put(parent);
- }
- }
-
iso_sock_clear_timer(sk);
iso_chan_del(sk, err);
release_sock(sk);
@@ -581,22 +552,23 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
return NULL;
}
-/* Find socket listening:
+/* Find socket in given state:
* source bdaddr (Unicast)
* destination bdaddr (Broadcast only)
* match func - pass NULL to ignore
* match func data - pass -1 to ignore
* Returns closest match.
*/
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data)
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data)
{
struct sock *sk = NULL, *sk1 = NULL;
read_lock(&iso_sk_list.lock);
sk_for_each(sk, &iso_sk_list.head) {
- if (sk->sk_state != BT_LISTEN)
+ if (sk->sk_state != state)
continue;
/* Match Broadcast destination */
@@ -690,11 +662,8 @@ static void iso_sock_cleanup_listen(struct sock *parent)
iso_sock_kill(sk);
}
- /* If listening socket stands for a PA sync connection,
- * properly disconnect the hcon and socket.
- */
- if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &iso_pi(parent)->conn->hcon->flags)) {
+ /* If listening socket has a hcon, properly disconnect it */
+ if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon) {
iso_sock_disconn(parent);
return;
}
@@ -860,6 +829,7 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
iso_pi(sk)->qos = default_qos;
+ iso_pi(sk)->sync_handle = -1;
bt_sock_link(&iso_sk_list, sk);
return sk;
@@ -907,7 +877,6 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
- iso_pi(sk)->sync_handle = -1;
if (sa->iso_bc->bc_sid > 0x0f)
return -EINVAL;
@@ -984,7 +953,8 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
/* Allow the user to bind a PA sync socket to a number
* of BISes to sync to.
*/
- if (sk->sk_state == BT_CONNECT2 &&
+ if ((sk->sk_state == BT_CONNECT2 ||
+ sk->sk_state == BT_CONNECTED) &&
test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
err = iso_sock_bind_pa_sk(sk, sa, addr_len);
goto done;
@@ -1076,6 +1046,8 @@ static int iso_listen_bis(struct sock *sk)
{
struct hci_dev *hdev;
int err = 0;
+ struct iso_conn *conn;
+ struct hci_conn *hcon;
BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src,
&iso_pi(sk)->dst, iso_pi(sk)->bc_sid);
@@ -1096,18 +1068,40 @@ static int iso_listen_bis(struct sock *sk)
if (!hdev)
return -EHOSTUNREACH;
+ hci_dev_lock(hdev);
+
/* Fail if user set invalid QoS */
if (iso_pi(sk)->qos_user_set && !check_bcast_qos(&iso_pi(sk)->qos)) {
iso_pi(sk)->qos = default_qos;
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
- err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
- le_addr_type(iso_pi(sk)->dst_type),
- iso_pi(sk)->bc_sid, &iso_pi(sk)->qos);
+ hcon = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ iso_pi(sk)->bc_sid, &iso_pi(sk)->qos);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
+
+ conn = iso_conn_add(hcon);
+ if (!conn) {
+ hci_conn_drop(hcon);
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = iso_chan_add(conn, sk, NULL);
+ if (err) {
+ hci_conn_drop(hcon);
+ goto unlock;
+ }
hci_dev_put(hdev);
+unlock:
+ hci_dev_unlock(hdev);
return err;
}
@@ -1165,7 +1159,7 @@ done:
}
static int iso_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -1174,7 +1168,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1264,7 +1258,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return -ENOTCONN;
}
- mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+ mtu = iso_pi(sk)->conn->hcon->mtu;
release_sock(sk);
@@ -1362,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- if (pi->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ if (test_bit(BT_SK_PA_SYNC, &pi->flags)) {
iso_conn_big_sync(sk);
sk->sk_state = BT_LISTEN;
} else {
@@ -1372,6 +1365,16 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
release_sock(sk);
return 0;
+ case BT_CONNECTED:
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ iso_conn_big_sync(sk);
+ sk->sk_state = BT_LISTEN;
+ release_sock(sk);
+ return 0;
+ }
+
+ release_sock(sk);
+ break;
case BT_CONNECT:
release_sock(sk);
return iso_connect_cis(sk);
@@ -1517,7 +1520,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
- sk->sk_state != BT_CONNECT2) {
+ sk->sk_state != BT_CONNECT2 &&
+ (!test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags) ||
+ sk->sk_state != BT_CONNECTED)) {
err = -EINVAL;
break;
}
@@ -1738,7 +1743,7 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev = NULL;
struct hci_ev_le_pa_sync_established *ev2 = NULL;
- struct hci_evt_le_big_info_adv_report *ev3 = NULL;
+ struct hci_ev_le_per_adv_report *ev3 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1756,32 +1761,37 @@ static void iso_conn_ready(struct iso_conn *conn)
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
/* Get reference to PA sync parent socket, if it exists */
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_pa_sync_flag, NULL);
+ parent = iso_get_sock(&hcon->src, &hcon->dst,
+ BT_LISTEN,
+ iso_match_pa_sync_flag,
+ NULL);
if (!parent && ev)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_big, ev);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_big, ev);
} else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
ev2 = hci_recv_event_data(hcon->hdev,
HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev2)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sid, ev2);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sid, ev2);
} else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) {
ev3 = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_INFO_ADV_REPORT);
+ HCI_EV_LE_PER_ADV_REPORT);
if (ev3)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sync_handle, ev3);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sync_handle_pa_report,
+ ev3);
}
if (!parent)
- parent = iso_get_sock_listen(&hcon->src,
- BDADDR_ANY, NULL, NULL);
+ parent = iso_get_sock(&hcon->src, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
if (!parent)
return;
@@ -1818,7 +1828,6 @@ static void iso_conn_ready(struct iso_conn *conn)
if (ev3) {
iso_pi(sk)->qos = iso_pi(parent)->qos;
- iso_pi(sk)->qos.bcast.encryption = ev3->encryption;
hcon->iso_qos = iso_pi(sk)->qos;
iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
@@ -1883,7 +1892,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
struct hci_evt_le_big_info_adv_report *ev2;
struct hci_ev_le_per_adv_report *ev3;
struct sock *sk;
- int lm = 0;
bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
@@ -1903,8 +1911,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
*/
ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev1) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
- ev1);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sid, ev1);
if (sk && !ev1->status)
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
@@ -1913,26 +1921,29 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
if (ev2) {
- /* Try to get PA sync listening socket, if it exists */
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_pa_sync_flag, NULL);
-
- if (!sk) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle, ev2);
-
- /* If PA Sync is in process of terminating,
- * do not handle any more BIGInfo adv reports.
- */
-
- if (sk && test_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(sk)->flags))
- return lm;
+ /* Check if BIGInfo report has already been handled */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECTED,
+ iso_match_sync_handle, ev2);
+ if (sk) {
+ sock_put(sk);
+ sk = NULL;
+ goto done;
}
+ /* Try to get PA sync socket, if it exists */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECT2,
+ iso_match_sync_handle, ev2);
+ if (!sk)
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr,
+ BT_LISTEN,
+ iso_match_sync_handle,
+ ev2);
+
if (sk) {
int err;
+ iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
+
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
@@ -1951,6 +1962,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
}
}
}
+
+ goto done;
}
ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
@@ -1959,8 +1972,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
u8 *base;
struct hci_conn *hcon;
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle_pa_report, ev3);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sync_handle_pa_report, ev3);
if (!sk)
goto done;
@@ -2009,21 +2022,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
hcon->le_per_adv_data_len = 0;
}
} else {
- sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
+ sk = iso_get_sock(&hdev->bdaddr, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
}
done:
if (!sk)
- return lm;
-
- lm |= HCI_LM_ACCEPT;
+ return 0;
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))
*flags |= HCI_PROTO_DEFER;
sock_put(sk);
- return lm;
+ return HCI_LM_ACCEPT;
}
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 84fc70862d..9988ba382b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -415,6 +415,9 @@ static void l2cap_chan_timeout(struct work_struct *work)
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+ if (!conn)
+ return;
+
mutex_lock(&conn->chan_lock);
/* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
* this work. No need to call l2cap_chan_hold(chan) here again.
@@ -454,6 +457,9 @@ struct l2cap_chan *l2cap_chan_create(void)
/* Set default lock nesting level */
atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL);
+ /* Available receive buffer space is initially unknown */
+ chan->rx_avail = -1;
+
write_lock(&chan_list_lock);
list_add(&chan->global_l, &chan_list);
write_unlock(&chan_list_lock);
@@ -535,6 +541,28 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
}
EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
+static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan)
+{
+ size_t sdu_len = chan->sdu ? chan->sdu->len : 0;
+
+ if (chan->mps == 0)
+ return 0;
+
+ /* If we don't know the available space in the receiver buffer, give
+ * enough credits for a full packet.
+ */
+ if (chan->rx_avail == -1)
+ return (chan->imtu / chan->mps) + 1;
+
+ /* If we know how much space is available in the receive buffer, give
+ * out as many credits as would fill the buffer.
+ */
+ if (chan->rx_avail <= sdu_len)
+ return 0;
+
+ return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps);
+}
+
static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
{
chan->sdu = NULL;
@@ -543,8 +571,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
chan->tx_credits = tx_credits;
/* Derive MPS from connection MTU to stop HCI fragmentation */
chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
- /* Give enough credits for a full packet */
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
skb_queue_head_init(&chan->tx_q);
}
@@ -556,7 +583,7 @@ static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits)
/* L2CAP implementations shall support a minimum MPS of 64 octets */
if (chan->mps < L2CAP_ECRED_MIN_MPS) {
chan->mps = L2CAP_ECRED_MIN_MPS;
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
}
}
@@ -1257,7 +1284,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
struct l2cap_ecred_conn_data {
struct {
- struct l2cap_ecred_conn_req req;
+ struct l2cap_ecred_conn_req_hdr req;
__le16 scid[5];
} __packed pdu;
struct l2cap_chan *chan;
@@ -3737,7 +3764,7 @@ static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data)
struct l2cap_ecred_rsp_data {
struct {
- struct l2cap_ecred_conn_rsp rsp;
+ struct l2cap_ecred_conn_rsp_hdr rsp;
__le16 scid[L2CAP_ECRED_MAX_CID];
} __packed pdu;
int count;
@@ -3746,6 +3773,8 @@ struct l2cap_ecred_rsp_data {
static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
{
struct l2cap_ecred_rsp_data *rsp = data;
+ struct l2cap_ecred_conn_rsp *rsp_flex =
+ container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr);
if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
return;
@@ -3755,7 +3784,7 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
/* Include all channels pending with the same ident */
if (!rsp->pdu.rsp.result)
- rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid);
+ rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid);
else
l2cap_chan_del(chan, ECONNRESET);
}
@@ -3902,13 +3931,12 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn,
return 0;
}
-static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u8 *data, u8 rsp_code, u8 amp_id)
+static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
+ u8 *data, u8 rsp_code)
{
struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
struct l2cap_conn_rsp rsp;
- struct l2cap_chan *chan = NULL, *pchan;
+ struct l2cap_chan *chan = NULL, *pchan = NULL;
int result, status = L2CAP_CS_NO_INFO;
u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3921,7 +3949,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
&conn->hcon->dst, ACL_LINK);
if (!pchan) {
result = L2CAP_CR_BAD_PSM;
- goto sendresp;
+ goto response;
}
mutex_lock(&conn->chan_lock);
@@ -3983,17 +4011,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
status = L2CAP_CS_AUTHOR_PEND;
chan->ops->defer(chan);
} else {
- /* Force pending result for AMP controllers.
- * The connection will succeed after the
- * physical link is up.
- */
- if (amp_id == AMP_ID_BREDR) {
- l2cap_state_change(chan, BT_CONFIG);
- result = L2CAP_CR_SUCCESS;
- } else {
- l2cap_state_change(chan, BT_CONNECT2);
- result = L2CAP_CR_PEND;
- }
+ l2cap_state_change(chan, BT_CONFIG);
+ result = L2CAP_CR_SUCCESS;
status = L2CAP_CS_NO_INFO;
}
} else {
@@ -4008,17 +4027,15 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
}
response:
- l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
- l2cap_chan_put(pchan);
-
-sendresp:
rsp.scid = cpu_to_le16(scid);
rsp.dcid = cpu_to_le16(dcid);
rsp.result = cpu_to_le16(result);
rsp.status = cpu_to_le16(status);
l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp);
+ if (!pchan)
+ return;
+
if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
struct l2cap_info_req info;
info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
@@ -4041,7 +4058,9 @@ sendresp:
chan->num_conf_req++;
}
- return chan;
+ l2cap_chan_unlock(pchan);
+ mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(pchan);
}
static int l2cap_connect_req(struct l2cap_conn *conn,
@@ -4058,7 +4077,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
- l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
+ l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP);
return 0;
}
@@ -4628,13 +4647,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- if (max > hcon->le_conn_max_interval) {
- BT_DBG("requested connection interval exceeds current bounds.");
- err = -EINVAL;
- } else {
- err = hci_check_conn_params(min, max, latency, to_multiplier);
- }
-
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
@@ -4994,10 +5007,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
u8 *data)
{
struct l2cap_ecred_conn_req *req = (void *) data;
- struct {
- struct l2cap_ecred_conn_rsp rsp;
- __le16 dcid[L2CAP_ECRED_MAX_CID];
- } __packed pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_conn_rsp, pdu, dcid, L2CAP_ECRED_MAX_CID);
struct l2cap_chan *chan, *pchan;
u16 mtu, mps;
__le16 psm;
@@ -5016,7 +5026,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
cmd_len -= sizeof(*req);
num_scid = cmd_len / sizeof(u16);
- if (num_scid > ARRAY_SIZE(pdu.dcid)) {
+ if (num_scid > L2CAP_ECRED_MAX_CID) {
result = L2CAP_CR_LE_INVALID_PARAMS;
goto response;
}
@@ -5045,7 +5055,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
- memset(&pdu, 0, sizeof(pdu));
+ memset(pdu, 0, sizeof(*pdu));
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
@@ -5071,8 +5081,8 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("scid[%d] 0x%4.4x", i, scid);
- pdu.dcid[i] = 0x0000;
- len += sizeof(*pdu.dcid);
+ pdu->dcid[i] = 0x0000;
+ len += sizeof(*pdu->dcid);
/* Check for valid dynamic CID range */
if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
@@ -5106,13 +5116,13 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
l2cap_ecred_init(chan, __le16_to_cpu(req->credits));
/* Init response */
- if (!pdu.rsp.credits) {
- pdu.rsp.mtu = cpu_to_le16(chan->imtu);
- pdu.rsp.mps = cpu_to_le16(chan->mps);
- pdu.rsp.credits = cpu_to_le16(chan->rx_credits);
+ if (!pdu->credits) {
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->credits = cpu_to_le16(chan->rx_credits);
}
- pdu.dcid[i] = cpu_to_le16(chan->scid);
+ pdu->dcid[i] = cpu_to_le16(chan->scid);
__set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
@@ -5134,13 +5144,13 @@ unlock:
l2cap_chan_put(pchan);
response:
- pdu.rsp.result = cpu_to_le16(result);
+ pdu->result = cpu_to_le16(result);
if (defer)
return 0;
l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP,
- sizeof(pdu.rsp) + len, &pdu);
+ sizeof(*pdu) + len, pdu);
return 0;
}
@@ -6239,7 +6249,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
chan->rx_state = L2CAP_RX_STATE_RECV;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
return l2cap_resegment(chan);
}
@@ -6306,7 +6316,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan,
*/
chan->next_tx_seq = control->reqseq;
chan->unacked_frames = 0;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
err = l2cap_resegment(chan);
@@ -6511,9 +6521,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
struct l2cap_le_credits pkt;
- u16 return_credits;
-
- return_credits = (chan->imtu / chan->mps) + 1;
+ u16 return_credits = l2cap_le_rx_credits(chan);
if (chan->rx_credits >= return_credits)
return;
@@ -6532,6 +6540,19 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt);
}
+void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail)
+{
+ if (chan->rx_avail == rx_avail)
+ return;
+
+ BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail);
+
+ chan->rx_avail = rx_avail;
+
+ if (chan->state == BT_CONNECTED)
+ l2cap_chan_le_send_credits(chan);
+}
+
static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
{
int err;
@@ -6541,6 +6562,12 @@ static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
/* Wait recv to confirm reception before updating the credits */
err = chan->ops->recv(chan, skb);
+ if (err < 0 && chan->rx_avail != -1) {
+ BT_ERR("Queueing received LE L2CAP data failed");
+ l2cap_send_disconn_req(chan, ECONNRESET);
+ return err;
+ }
+
/* Update credits whenever an SDU is received */
l2cap_chan_le_send_credits(chan);
@@ -6563,7 +6590,8 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
}
chan->rx_credits--;
- BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits);
+ BT_DBG("chan %p: rx_credits %u -> %u",
+ chan, chan->rx_credits + 1, chan->rx_credits);
/* Update if remote had run out of credits, this should only happens
* if the remote is not using the entire MPS.
@@ -6733,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
BT_DBG("chan %p, len %d", chan, skb->len);
+ l2cap_chan_lock(chan);
+
if (chan->state != BT_BOUND && chan->state != BT_CONNECTED)
goto drop;
@@ -6744,11 +6774,13 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
bt_cb(skb)->l2cap.psm = psm;
if (!chan->ops->recv(chan, skb)) {
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
return;
}
drop:
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
free_skb:
kfree_skb(skb);
@@ -6846,18 +6878,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
- switch (hcon->type) {
- case LE_LINK:
- if (hcon->hdev->le_mtu) {
- conn->mtu = hcon->hdev->le_mtu;
- break;
- }
- fallthrough;
- default:
- conn->mtu = hcon->hdev->acl_mtu;
- break;
- }
-
+ conn->mtu = hcon->mtu;
conn->feat_mask = 0;
conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS;
@@ -7111,14 +7132,11 @@ EXPORT_SYMBOL_GPL(l2cap_chan_connect);
static void l2cap_ecred_reconfigure(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
- struct {
- struct l2cap_ecred_reconf_req req;
- __le16 scid;
- } pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1);
- pdu.req.mtu = cpu_to_le16(chan->imtu);
- pdu.req.mps = cpu_to_le16(chan->mps);
- pdu.scid = cpu_to_le16(chan->scid);
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->scid[0] = cpu_to_le16(chan->scid);
chan->ident = l2cap_get_ident(conn);
@@ -7462,10 +7480,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
struct l2cap_conn *conn = hcon->l2cap_data;
int len;
- /* For AMP controller do not create l2cap conn */
- if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
- goto drop;
-
if (!conn)
conn = l2cap_conn_add(hcon);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5cc83f906c..ba437c6f6e 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -327,7 +327,7 @@ done:
}
static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -336,7 +336,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock_nested(sk, L2CAP_NESTING_PARENT);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1131,6 +1131,34 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return err;
}
+static void l2cap_publish_rx_avail(struct l2cap_chan *chan)
+{
+ struct sock *sk = chan->data;
+ ssize_t avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc);
+ int expected_skbs, skb_overhead;
+
+ if (avail <= 0) {
+ l2cap_chan_rx_avail(chan, 0);
+ return;
+ }
+
+ if (!chan->mps) {
+ l2cap_chan_rx_avail(chan, -1);
+ return;
+ }
+
+ /* Correct available memory by estimated sk_buff overhead.
+ * This is significant due to small transfer sizes. However, accept
+ * at least one full packet if receive space is non-zero.
+ */
+ expected_skbs = DIV_ROUND_UP(avail, chan->mps);
+ skb_overhead = expected_skbs * sizeof(struct sk_buff);
+ if (skb_overhead < avail)
+ l2cap_chan_rx_avail(chan, avail - skb_overhead);
+ else
+ l2cap_chan_rx_avail(chan, -1);
+}
+
static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1167,28 +1195,33 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
else
err = bt_sock_recvmsg(sock, msg, len, flags);
- if (pi->chan->mode != L2CAP_MODE_ERTM)
+ if (pi->chan->mode != L2CAP_MODE_ERTM &&
+ pi->chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ pi->chan->mode != L2CAP_MODE_EXT_FLOWCTL)
return err;
- /* Attempt to put pending rx data in the socket buffer */
-
lock_sock(sk);
- if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state))
- goto done;
+ l2cap_publish_rx_avail(pi->chan);
- if (pi->rx_busy_skb) {
- if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
- pi->rx_busy_skb = NULL;
- else
+ /* Attempt to put pending rx data in the socket buffer */
+ while (!list_empty(&pi->rx_busy)) {
+ struct l2cap_rx_busy *rx_busy =
+ list_first_entry(&pi->rx_busy,
+ struct l2cap_rx_busy,
+ list);
+ if (__sock_queue_rcv_skb(sk, rx_busy->skb) < 0)
goto done;
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
/* Restore data flow when half of the receive buffer is
* available. This avoids resending large numbers of
* frames.
*/
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
+ if (test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state) &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
l2cap_chan_busy(pi->chan, 0);
done:
@@ -1206,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
+ /* Sock is dead, so set chan data to NULL, avoid other task use invalid
+ * sock pointer.
+ */
+ l2cap_pi(sk)->chan->data = NULL;
/* Kill poor orphan */
l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1448,18 +1485,25 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct sock *sk = chan->data;
+ struct sock *sk;
+ struct l2cap_pinfo *pi;
int err;
- lock_sock(sk);
+ sk = chan->data;
+ if (!sk)
+ return -ENXIO;
- if (l2cap_pi(sk)->rx_busy_skb) {
+ pi = l2cap_pi(sk);
+ lock_sock(sk);
+ if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
err = -ENOMEM;
goto done;
}
if (chan->mode != L2CAP_MODE_ERTM &&
- chan->mode != L2CAP_MODE_STREAMING) {
+ chan->mode != L2CAP_MODE_STREAMING &&
+ chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ chan->mode != L2CAP_MODE_EXT_FLOWCTL) {
/* Even if no filter is attached, we could potentially
* get errors from security modules, etc.
*/
@@ -1470,7 +1514,9 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
err = __sock_queue_rcv_skb(sk, skb);
- /* For ERTM, handle one skb that doesn't fit into the recv
+ l2cap_publish_rx_avail(chan);
+
+ /* For ERTM and LE, handle a skb that doesn't fit into the recv
* buffer. This is important to do because the data frames
* have already been acked, so the skb cannot be discarded.
*
@@ -1479,8 +1525,18 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
* acked and reassembled until there is buffer space
* available.
*/
- if (err < 0 && chan->mode == L2CAP_MODE_ERTM) {
- l2cap_pi(sk)->rx_busy_skb = skb;
+ if (err < 0 &&
+ (chan->mode == L2CAP_MODE_ERTM ||
+ chan->mode == L2CAP_MODE_LE_FLOWCTL ||
+ chan->mode == L2CAP_MODE_EXT_FLOWCTL)) {
+ struct l2cap_rx_busy *rx_busy =
+ kmalloc(sizeof(*rx_busy), GFP_KERNEL);
+ if (!rx_busy) {
+ err = -ENOMEM;
+ goto done;
+ }
+ rx_busy->skb = skb;
+ list_add_tail(&rx_busy->list, &pi->rx_busy);
l2cap_chan_busy(chan, 1);
err = 0;
}
@@ -1706,6 +1762,8 @@ static const struct l2cap_ops l2cap_chan_ops = {
static void l2cap_sock_destruct(struct sock *sk)
{
+ struct l2cap_rx_busy *rx_busy, *next;
+
BT_DBG("sk %p", sk);
if (l2cap_pi(sk)->chan) {
@@ -1713,9 +1771,10 @@ static void l2cap_sock_destruct(struct sock *sk)
l2cap_chan_put(l2cap_pi(sk)->chan);
}
- if (l2cap_pi(sk)->rx_busy_skb) {
- kfree_skb(l2cap_pi(sk)->rx_busy_skb);
- l2cap_pi(sk)->rx_busy_skb = NULL;
+ list_for_each_entry_safe(rx_busy, next, &l2cap_pi(sk)->rx_busy, list) {
+ kfree_skb(rx_busy->skb);
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
skb_queue_purge(&sk->sk_receive_queue);
@@ -1799,6 +1858,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
chan->data = sk;
chan->ops = &l2cap_chan_ops;
+
+ l2cap_publish_rx_avail(chan);
}
static struct proto l2cap_proto = {
@@ -1820,6 +1881,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
+ INIT_LIST_HEAD(&l2cap_pi(sk)->rx_busy);
+
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index b8e05ddeed..80f220b7e1 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -443,8 +443,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -468,8 +467,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -503,8 +501,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -528,8 +525,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -561,10 +557,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
read_lock(&hci_dev_list_lock);
count = 0;
- list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
- count++;
- }
+ list_for_each_entry(d, &hci_dev_list, list)
+ count++;
rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC);
if (!rp) {
@@ -585,16 +579,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY) {
- if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
- rp->entry[count].type = 0x01;
- else
- rp->entry[count].type = 0x00;
- } else if (d->dev_type == HCI_AMP) {
- rp->entry[count].type = 0x02;
- } else {
- continue;
- }
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ rp->entry[count].type = 0x01;
+ else
+ rp->entry[count].type = 0x00;
rp->entry[count].bus = d->bus;
rp->entry[count++].index = cpu_to_le16(d->id);
@@ -1385,6 +1373,14 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ if (!cp->val) {
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
+ MGMT_STATUS_BUSY);
+ goto failed;
+ }
+ }
+
if (pending_find(MGMT_OP_SET_POWERED, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
MGMT_STATUS_BUSY);
@@ -1699,8 +1695,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- if (cmd)
- mgmt_pending_remove(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -8770,8 +8765,7 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
}
unlock:
- if (cmd)
- mgmt_pending_free(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
}
@@ -9325,23 +9319,14 @@ void mgmt_index_added(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
@@ -9358,25 +9343,16 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
@@ -9692,6 +9668,9 @@ bool mgmt_powering_down(struct hci_dev *hdev)
struct mgmt_pending_cmd *cmd;
struct mgmt_mode *cp;
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN))
+ return true;
+
cmd = pending_find(MGMT_OP_SET_POWERED, hdev);
if (!cmd)
return false;
@@ -9999,6 +9978,9 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
/* If this is a HCI command related to powering on the
* HCI dev don't send any mgmt signals.
*/
+ if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN))
+ return;
+
if (pending_find(MGMT_OP_SET_POWERED, hdev))
return;
}
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 9612c5d1b1..d039683d3b 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -769,7 +769,7 @@ void msft_register(struct hci_dev *hdev)
mutex_init(&msft->filter_lock);
}
-void msft_unregister(struct hci_dev *hdev)
+void msft_release(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 2a63205b37..fe538e9c91 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -14,7 +14,7 @@
bool msft_monitor_supported(struct hci_dev *hdev);
void msft_register(struct hci_dev *hdev);
-void msft_unregister(struct hci_dev *hdev);
+void msft_release(struct hci_dev *hdev);
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb);
@@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev)
}
static inline void msft_register(struct hci_dev *hdev) {}
-static inline void msft_unregister(struct hci_dev *hdev) {}
+static inline void msft_release(struct hci_dev *hdev) {}
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 29aa07e9db..37d63d768a 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -468,8 +468,8 @@ done:
return err;
}
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 5d03c5440b..a5ac160c59 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work)
struct sock *sk;
sco_conn_lock(conn);
+ if (!conn->hcon) {
+ sco_conn_unlock(conn);
+ return;
+ }
sk = conn->sk;
if (sk)
sock_hold(sk);
@@ -122,7 +126,6 @@ static void sco_sock_clear_timer(struct sock *sk)
/* ---- SCO connections ---- */
static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
{
- struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
if (conn) {
@@ -140,9 +143,10 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
hcon->sco_data = conn;
conn->hcon = hcon;
+ conn->mtu = hcon->mtu;
- if (hdev->sco_mtu > 0)
- conn->mtu = hdev->sco_mtu;
+ if (hcon->mtu > 0)
+ conn->mtu = hcon->mtu;
else
conn->mtu = 60;
@@ -643,7 +647,7 @@ done:
}
static int sco_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -652,7 +656,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 8906f7bdf4..891cdf61c6 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -7,7 +7,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
-extern struct bpf_struct_ops bpf_bpf_dummy_ops;
+static struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
@@ -22,6 +22,8 @@ struct bpf_dummy_ops_test_args {
struct bpf_dummy_ops_state state;
};
+static struct btf *bpf_dummy_ops_btf;
+
static struct bpf_dummy_ops_test_args *
dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr)
{
@@ -77,6 +79,51 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+static const struct bpf_ctx_arg_aux *find_ctx_arg_info(struct bpf_prog_aux *aux, int offset)
+{
+ int i;
+
+ for (i = 0; i < aux->ctx_arg_info_size; i++)
+ if (aux->ctx_arg_info[i].offset == offset)
+ return &aux->ctx_arg_info[i];
+
+ return NULL;
+}
+
+/* There is only one check at the moment:
+ * - zero should not be passed for pointer parameters not marked as nullable.
+ */
+static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_args *args)
+{
+ const struct btf_type *func_proto = prog->aux->attach_func_proto;
+
+ for (u32 arg_no = 0; arg_no < btf_type_vlen(func_proto) ; ++arg_no) {
+ const struct btf_param *param = &btf_params(func_proto)[arg_no];
+ const struct bpf_ctx_arg_aux *info;
+ const struct btf_type *t;
+ int offset;
+
+ if (args->args[arg_no] != 0)
+ continue;
+
+ /* Program is validated already, so there is no need
+ * to check if t is NULL.
+ */
+ t = btf_type_skip_modifiers(bpf_dummy_ops_btf, param->type, NULL);
+ if (!btf_type_is_ptr(t))
+ continue;
+
+ offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no);
+ info = find_ctx_arg_info(prog->aux, offset);
+ if (info && (info->reg_type & PTR_MAYBE_NULL))
+ continue;
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
extern const struct bpf_link_ops bpf_struct_ops_link_lops;
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -85,14 +132,21 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_links *tlinks = NULL;
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
+ u32 image_off = 0;
int prog_ret;
+ s32 type_id;
int err;
- if (prog->aux->attach_btf_id != st_ops->type_id)
+ type_id = btf_find_by_name_kind(bpf_dummy_ops_btf,
+ bpf_bpf_dummy_ops.name,
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ if (prog->aux->attach_btf_id != type_id)
return -EOPNOTSUPP;
func_proto = prog->aux->attach_func_proto;
@@ -100,14 +154,12 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
- tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
- if (!tlinks) {
- err = -ENOMEM;
+ err = check_test_run_args(prog, args);
+ if (err)
goto out;
- }
- image = arch_alloc_bpf_trampoline(PAGE_SIZE);
- if (!image) {
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks) {
err = -ENOMEM;
goto out;
}
@@ -125,11 +177,14 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
&dummy_ops_test_ret_function,
- image, image + PAGE_SIZE);
+ &image, &image_off,
+ true);
if (err < 0)
goto out;
- arch_protect_bpf_trampoline(image, PAGE_SIZE);
+ err = arch_protect_bpf_trampoline(image, PAGE_SIZE);
+ if (err)
+ goto out;
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
@@ -139,7 +194,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- arch_free_bpf_trampoline(image, PAGE_SIZE);
+ bpf_struct_ops_image_free(image);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
@@ -148,6 +203,7 @@ out:
static int bpf_dummy_init(struct btf *btf)
{
+ bpf_dummy_ops_btf = btf;
return 0;
}
@@ -169,7 +225,7 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t,
case offsetof(struct bpf_dummy_ops, test_sleepable):
break;
default:
- if (prog->aux->sleepable)
+ if (prog->sleepable)
return -EINVAL;
}
@@ -225,7 +281,7 @@ static void bpf_dummy_unreg(void *kdata)
{
}
-static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+static int bpf_dummy_ops__test_1(struct bpf_dummy_ops_state *cb__nullable)
{
return 0;
}
@@ -242,12 +298,12 @@ static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
}
static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
- .test_1 = bpf_dummy_test_1,
+ .test_1 = bpf_dummy_ops__test_1,
.test_2 = bpf_dummy_test_2,
.test_sleepable = bpf_dummy_test_sleepable,
};
-struct bpf_struct_ops bpf_bpf_dummy_ops = {
+static struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
.check_member = bpf_dummy_ops_check_member,
@@ -256,4 +312,11 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
.cfi_stubs = &__bpf_bpf_dummy_ops,
+ .owner = THIS_MODULE,
};
+
+static int __init bpf_dummy_struct_ops_init(void)
+{
+ return register_bpf_struct_ops(&bpf_bpf_dummy_ops, bpf_dummy_ops);
+}
+late_initcall(bpf_dummy_struct_ops_init);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index dfd9193740..36ae54f57b 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,6 +12,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
+#include <net/hotdata.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
@@ -254,7 +255,8 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
int i, n;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs);
+ n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes,
+ (void **)skbs);
if (unlikely(n == 0)) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
@@ -573,6 +575,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
return a + *b + c + d + (long)e + f + g;
}
+__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
+{
+ trace_bpf_trigger_tp(nonce);
+
+ return nonce;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -617,21 +626,22 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor);
__bpf_kfunc_end_defs();
-BTF_SET8_START(bpf_test_modify_return_ids)
+BTF_KFUNCS_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
BTF_ID_FLAGS(func, bpf_modify_return_test2)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
-BTF_SET8_END(bpf_test_modify_return_ids)
+BTF_KFUNCS_END(bpf_test_modify_return_ids)
static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
.owner = THIS_MODULE,
.set = &bpf_test_modify_return_ids,
};
-BTF_SET8_START(test_sk_check_kfunc_ids)
+BTF_KFUNCS_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
-BTF_SET8_END(test_sk_check_kfunc_ids)
+BTF_KFUNCS_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
u32 size, u32 headroom, u32 tailroom)
@@ -717,10 +727,16 @@ static void
__bpf_prog_test_run_raw_tp(void *data)
{
struct bpf_raw_tp_test_run_info *info = data;
+ struct bpf_trace_run_ctx run_ctx = {};
+ struct bpf_run_ctx *old_run_ctx;
+
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
rcu_read_lock();
info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
+
+ bpf_reset_run_ctx(old_run_ctx);
}
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ac19b797db..2cab878e0a 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit_batch(struct list_head *net_list)
+static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net_device *dev;
struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
for_each_netdev(net, dev)
if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
-
- unregister_netdevice_many(&list);
-
- rtnl_unlock();
+ br_dev_delete(dev, dev_to_kill);
}
static struct pernet_operations br_net_ops = {
- .exit_batch = br_net_exit_batch,
+ .exit_batch_rtnl = br_net_exit_batch_rtnl,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 65cee0ad3c..fb1115857e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,6 +27,7 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ enum skb_drop_reason reason = pskb_may_pull_reason(skb, ETH_HLEN);
struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mcast *brmctx = &br->multicast_ctx;
@@ -38,6 +39,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest;
u16 vid = 0;
+ if (unlikely(reason != SKB_NOT_DROPPED_YET)) {
+ kfree_skb_reason(skb, reason);
+ return NETDEV_TX_OK;
+ }
+
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
br_tc_skb_miss_set(skb, false);
@@ -108,38 +114,23 @@ out:
return NETDEV_TX_OK;
}
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
-static void br_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = br_fdb_hash_init(br);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = br_mdb_hash_init(br);
if (err) {
- free_percpu(dev->tstats);
br_fdb_hash_fini(br);
return err;
}
err = br_vlan_init(br);
if (err) {
- free_percpu(dev->tstats);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
return err;
@@ -147,14 +138,14 @@ static int br_dev_init(struct net_device *dev)
err = br_multicast_init_stats(br);
if (err) {
- free_percpu(dev->tstats);
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
+ return err;
}
- br_set_lockdep_class(dev);
- return err;
+ netdev_lockdep_set_classes(dev);
+ return 0;
}
static void br_dev_uninit(struct net_device *dev)
@@ -166,7 +157,6 @@ static void br_dev_uninit(struct net_device *dev)
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
- free_percpu(dev->tstats);
}
static int br_dev_open(struct net_device *dev)
@@ -213,7 +203,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
/* this flag will be cleared if the MTU was automatically adjusted */
br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
@@ -405,7 +395,7 @@ static int br_fill_forward_path(struct net_device_path_ctx *ctx,
br_vlan_fill_forward_path_pvid(br, ctx, path);
f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
- if (!f || !f->dst)
+ if (!f)
return -1;
dst = READ_ONCE(f->dst);
@@ -481,7 +471,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fill_forward_path = br_fill_forward_path,
};
-static struct device_type br_type = {
+static const struct device_type br_type = {
.name = "bridge",
};
@@ -503,6 +493,7 @@ void br_dev_setup(struct net_device *dev)
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
br->dev = dev;
spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c622de5ecc..c77591e638 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -35,10 +35,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly;
int __init br_fdb_init(void)
{
- br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
- sizeof(struct net_bridge_fdb_entry),
- 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ br_fdb_cache = KMEM_CACHE(net_bridge_fdb_entry, SLAB_HWCACHE_ALIGN);
if (!br_fdb_cache)
return -ENOMEM;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7431f89e89..e19b583ff2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -25,8 +25,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) &&
- nbp_switchdev_allowed_egress(p, skb) &&
+ (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) &&
+ br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) &&
!br_skb_isolated(p, skb);
}
@@ -258,6 +258,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
const unsigned char *src = eth_hdr(skb)->h_source;
+ struct sk_buff *nskb;
if (!should_deliver(p, skb))
return;
@@ -266,12 +267,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
if (skb->dev == p->dev && ether_addr_equal(src, addr))
return;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb) {
+ __skb_push(skb, ETH_HLEN);
+ nskb = pskb_copy(skb, GFP_ATOMIC);
+ __skb_pull(skb, ETH_HLEN);
+ if (!nskb) {
DEV_STATS_INC(dev, tx_dropped);
return;
}
+ skb = nskb;
+ __skb_pull(skb, ETH_HLEN);
if (!is_broadcast_ether_addr(addr))
memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN);
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
index ee680adcee..1820f09ff5 100644
--- a/net/bridge/br_mst.c
+++ b/net/bridge/br_mst.c
@@ -73,12 +73,11 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state)
}
EXPORT_SYMBOL_GPL(br_mst_get_state);
-static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v,
+static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
u8 state)
{
- struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
-
- if (v->state == state)
+ if (br_vlan_get_state(v) == state)
return;
br_vlan_set_state(v, state);
@@ -100,11 +99,12 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
};
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
- int err;
+ int err = 0;
- vg = nbp_vlan_group(p);
+ rcu_read_lock();
+ vg = nbp_vlan_group_rcu(p);
if (!vg)
- return 0;
+ goto out;
/* MSTI 0 (CST) state changes are notified via the regular
* SWITCHDEV_ATTR_ID_PORT_STP_STATE.
@@ -112,17 +112,20 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
if (msti) {
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err && err != -EOPNOTSUPP)
- return err;
+ goto out;
}
- list_for_each_entry(v, &vg->vlan_list, vlist) {
+ err = 0;
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
if (v->brvlan->msti != msti)
continue;
- br_mst_vlan_set_state(p, v, state);
+ br_mst_vlan_set_state(vg, v, state);
}
- return 0;
+out:
+ rcu_read_unlock();
+ return err;
}
static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
@@ -136,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
* it.
*/
if (v != pv && v->brvlan->msti == msti) {
- br_mst_vlan_set_state(pv->port, pv, v->state);
+ br_mst_vlan_set_state(vg, pv, v->state);
return;
}
}
/* Otherwise, start out in a new MSTI with all ports disabled. */
- return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED);
+ return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED);
}
int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2d7b732429..b2ae0d2434 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
- HLIST_HEAD(deleted_head);
struct hlist_node *n;
/* Take care of the remaining groups, only perm ones should be left */
spin_lock_bh(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
br_multicast_find_del_pg(br, pg);
- hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
- br_multicast_gc(&deleted_head);
+ flush_work(&br->mcast_gc_work);
br_multicast_port_ctx_deinit(&port->multicast_ctx);
free_percpu(port->mcast_stats);
}
@@ -5053,7 +5051,7 @@ void br_multicast_uninit_stats(struct net_bridge *br)
free_percpu(br->mcast_stats);
}
-/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */
+/* noinline for https://llvm.org/pr45802#c9 */
static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src)
{
dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 22e35623c1..bf30c50b56 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -399,7 +399,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0);
+ RT_TOS(iph->tos), 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
@@ -1225,7 +1226,6 @@ static struct ctl_table brnf_table[] = {
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
- { }
};
static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
@@ -1274,7 +1274,7 @@ static int br_netfilter_sysctl_init_net(struct net *net)
static void br_netfilter_sysctl_exit_net(struct net *net,
struct brnf_net *brnet)
{
- struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
+ const struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(brnet->ctl_hdr);
if (!net_eq(net, &init_net))
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d415833fce..f17dbac7d8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
u32 filter_mask, const struct net_device *dev,
bool getlink)
{
- u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+ u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN;
struct nlattr *af = NULL;
struct net_bridge *br;
struct ifinfomsg *hdr;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 15f44d026e..9c2fffb827 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -841,7 +841,7 @@ void br_vlan_flush(struct net_bridge *br)
vg = br_vlan_group(br);
__vlan_flush(br, NULL, vg);
RCU_INIT_POINTER(br->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
@@ -1372,7 +1372,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
vg = nbp_vlan_group(port);
__vlan_flush(port->br, port, vg);
RCU_INIT_POINTER(port->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 81833ca7a2..a966a6ec82 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -65,13 +65,14 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
{
struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
int err;
if (metadata)
return -EEXIST;
- metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
- key, 0);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, key, 0);
if (!metadata)
return -EINVAL;
@@ -185,6 +186,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan)
{
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tunnel_dst;
__be64 tunnel_id;
int err;
@@ -202,7 +204,8 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
return err;
if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
- tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, flags,
tunnel_id, 0);
if (!tunnel_dst)
return -ENOMEM;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 7f304a19ac..104c0125e3 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -39,6 +39,10 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
+# old sockopt interface and eval loop
+config BRIDGE_NF_EBTABLES_LEGACY
+ tristate
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
@@ -55,6 +59,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -65,6 +70,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -74,6 +80,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 1c9ce49ab6..b9a1303da9 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
+obj-$(CONFIG_BRIDGE_NF_EBTABLES_LEGACY) += ebtables.o
# tables
obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 8480684f27..20139fa1be 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -201,14 +201,14 @@ int cfctrl_linkup_request(struct cflayer *layer,
struct cflayer *user_layer)
{
struct cfctrl *cfctrl = container_obj(layer);
+ struct cflayer *dn = cfctrl->serv.layer.dn;
+ char utility_name[UTILITY_NAME_LENGTH];
+ struct cfctrl_request_info *req;
+ struct cfpkt *pkt;
u32 tmp32;
u16 tmp16;
u8 tmp8;
- struct cfctrl_request_info *req;
int ret;
- char utility_name[16];
- struct cfpkt *pkt;
- struct cflayer *dn = cfctrl->serv.layer.dn;
if (!dn) {
pr_debug("not able to send linkup request\n");
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7343fd487d..707576eeeb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -865,6 +865,8 @@ static __init int can_init(void)
/* check for correct padding to be able to use the structs similarly */
BUILD_BUG_ON(offsetof(struct can_frame, len) !=
offsetof(struct canfd_frame, len) ||
+ offsetof(struct can_frame, len) !=
+ offsetof(struct canxl_frame, flags) ||
offsetof(struct can_frame, data) !=
offsetof(struct canfd_frame, data));
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9168114fc8..27d5fcf0ea 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -72,9 +72,11 @@
#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60)
/* use of last_frames[index].flags */
+#define RX_LOCAL 0x10 /* frame was created on the local host */
+#define RX_OWN 0x20 /* frame was sent via the socket it was received on */
#define RX_RECV 0x40 /* received data for this element */
#define RX_THR 0x80 /* element not been sent due to throttle feature */
-#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */
+#define BCM_CAN_FLAGS_MASK 0x0F /* to clean private flags after usage */
/* get best masking value for can_rx_register() for a given single can_id */
#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
@@ -138,6 +140,16 @@ static LIST_HEAD(bcm_notifier_list);
static DEFINE_SPINLOCK(bcm_notifier_lock);
static struct bcm_sock *bcm_busy_notifier;
+/* Return pointer to store the extra msg flags for bcm_recvmsg().
+ * We use the space of one unsigned int beyond the 'struct sockaddr_can'
+ * in skb->cb.
+ */
+static inline unsigned int *bcm_flags(struct sk_buff *skb)
+{
+ /* return pointer after struct sockaddr_can */
+ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
+}
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -325,6 +337,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
struct sock *sk = op->sk;
unsigned int datalen = head->nframes * op->cfsiz;
int err;
+ unsigned int *pflags;
skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
if (!skb)
@@ -332,6 +345,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
skb_put_data(skb, head, sizeof(*head));
+ /* ensure space for sockaddr_can and msg flags */
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+ sizeof(unsigned int));
+
+ /* initialize msg flags */
+ pflags = bcm_flags(skb);
+ *pflags = 0;
+
if (head->nframes) {
/* CAN frames starting here */
firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
@@ -344,8 +365,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* relevant for updates that are generated by the
* BCM, where nframes is 1
*/
- if (head->nframes == 1)
+ if (head->nframes == 1) {
+ if (firstframe->flags & RX_LOCAL)
+ *pflags |= MSG_DONTROUTE;
+ if (firstframe->flags & RX_OWN)
+ *pflags |= MSG_CONFIRM;
+
firstframe->flags &= BCM_CAN_FLAGS_MASK;
+ }
}
if (has_timestamp) {
@@ -360,7 +387,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/
- sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
@@ -444,7 +470,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
op->frames_filtered = op->frames_abs = 0;
/* this element is not throttled anymore */
- data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
+ data->flags &= ~RX_THR;
memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
@@ -465,13 +491,17 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
*/
static void bcm_rx_update_and_send(struct bcm_op *op,
struct canfd_frame *lastdata,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
memcpy(lastdata, rxdata, op->cfsiz);
/* mark as used and throttled by default */
lastdata->flags |= (RX_RECV|RX_THR);
+ /* add own/local/remote traffic flags */
+ lastdata->flags |= traffic_flags;
+
/* throttling mode inactive ? */
if (!op->kt_ival2) {
/* send RX_CHANGED to the user immediately */
@@ -508,7 +538,8 @@ rx_changed_settime:
* received data stored in op->last_frames[]
*/
static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
struct canfd_frame *cf = op->frames + op->cfsiz * index;
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
@@ -521,7 +552,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (!(lcf->flags & RX_RECV)) {
/* received data for the first time => send update to user */
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
@@ -529,7 +560,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
for (i = 0; i < rxdata->len; i += 8) {
if ((get_u64(cf, i) & get_u64(rxdata, i)) !=
(get_u64(cf, i) & get_u64(lcf, i))) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -537,7 +568,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (op->flags & RX_CHECK_DLC) {
/* do a real check in CAN frame length */
if (rxdata->len != lcf->len) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -644,6 +675,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
struct bcm_op *op = (struct bcm_op *)data;
const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data;
unsigned int i;
+ unsigned char traffic_flags;
if (op->can_id != rxframe->can_id)
return;
@@ -673,15 +705,24 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
return;
}
+ /* compute flags to distinguish between own/local/remote CAN traffic */
+ traffic_flags = 0;
+ if (skb->sk) {
+ traffic_flags |= RX_LOCAL;
+ if (skb->sk == op->sk)
+ traffic_flags |= RX_OWN;
+ }
+
if (op->flags & RX_FILTER_ID) {
/* the easiest case */
- bcm_rx_update_and_send(op, op->last_frames, rxframe);
+ bcm_rx_update_and_send(op, op->last_frames, rxframe,
+ traffic_flags);
goto rx_starttimer;
}
if (op->nframes == 1) {
/* simple compare with index 0 */
- bcm_rx_cmp_to_index(op, 0, rxframe);
+ bcm_rx_cmp_to_index(op, 0, rxframe, traffic_flags);
goto rx_starttimer;
}
@@ -698,7 +739,8 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) ==
(get_u64(op->frames, 0) &
get_u64(op->frames + op->cfsiz * i, 0))) {
- bcm_rx_cmp_to_index(op, i, rxframe);
+ bcm_rx_cmp_to_index(op, i, rxframe,
+ traffic_flags);
break;
}
}
@@ -1675,6 +1717,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* assign the flags that have been recorded in bcm_send_to_user() */
+ msg->msg_flags |= *(bcm_flags(skb));
+
skb_free_datagram(sk, skb);
return size;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d1c6f206f4..25bac0fafc 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -381,8 +381,9 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
return 1;
}
- /* get communication parameters only from the first FC frame */
- if (so->tx.state == ISOTP_WAIT_FIRST_FC) {
+ /* get static/dynamic communication params from first/every FC frame */
+ if (so->tx.state == ISOTP_WAIT_FIRST_FC ||
+ so->opt.flags & CAN_ISOTP_DYN_FC_PARMS) {
so->txfc.bs = cf->data[ae + 1];
so->txfc.stmin = cf->data[ae + 2];
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index a6fb89fa62..7e8a20f2fc 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -30,10 +30,6 @@ MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
/* CAN_HDR: #bytes before can_frame data part */
#define J1939_CAN_HDR (offsetof(struct can_frame, data))
-/* CAN_FTR: #bytes beyond data part */
-#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
- sizeof(((struct can_frame *)0)->data))
-
/* lowest layer */
static void j1939_can_recv(struct sk_buff *iskb, void *data)
{
@@ -342,7 +338,7 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
memset(cf, 0, J1939_CAN_HDR);
/* make it a full can frame again */
- skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+ skb_put_zero(skb, 8 - dlc);
canid = CAN_EFF_FLAG |
(skcb->priority << 26) |
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fe3df23a25..4be73de503 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1593,8 +1593,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
struct j1939_session *session;
const u8 *dat;
+ int len, ret;
pgn_t pgn;
- int len;
netdev_dbg(priv->ndev, "%s\n", __func__);
@@ -1653,7 +1653,22 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
session->tskey = priv->rx_tskey++;
j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
- WARN_ON_ONCE(j1939_session_activate(session));
+ ret = j1939_session_activate(session);
+ if (ret) {
+ /* Entering this scope indicates an issue with the J1939 bus.
+ * Possible scenarios include:
+ * - A time lapse occurred, and a new session was initiated
+ * due to another packet being sent correctly. This could
+ * have been caused by too long interrupt, debugger, or being
+ * out-scheduled by another task.
+ * - The bus is receiving numerous erroneous packets, either
+ * from a malfunctioning device or during a test scenario.
+ */
+ netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n",
+ __func__, session, skcb.addr.sa, skcb.addr.da);
+ j1939_session_put(session);
+ return NULL;
+ }
return session;
}
@@ -1681,6 +1696,8 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
j1939_session_timers_cancel(session);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+ if (session->transmission)
+ j1939_session_deactivate_activate_next(session);
return -EBUSY;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index e6b822624b..00533f64d6 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -91,6 +91,10 @@ struct raw_sock {
int recv_own_msgs;
int fd_frames;
int xl_frames;
+ struct can_raw_vcid_options raw_vcid_opts;
+ canid_t tx_vcid_shifted;
+ canid_t rx_vcid_shifted;
+ canid_t rx_vcid_mask_shifted;
int join_filters;
int count; /* number of active filters */
struct can_filter dfilter; /* default/single filter */
@@ -134,10 +138,29 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
- (!ro->xl_frames && can_is_canxl_skb(oskb)))
+ if (!ro->fd_frames && can_is_canfd_skb(oskb))
return;
+ if (can_is_canxl_skb(oskb)) {
+ struct canxl_frame *cxl = (struct canxl_frame *)oskb->data;
+
+ /* make sure to not pass oversized frames to the socket */
+ if (!ro->xl_frames)
+ return;
+
+ /* filter CAN XL VCID content */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_RX_FILTER) {
+ /* apply VCID filter if user enabled the filter */
+ if ((cxl->prio & ro->rx_vcid_mask_shifted) !=
+ (ro->rx_vcid_shifted & ro->rx_vcid_mask_shifted))
+ return;
+ } else {
+ /* no filter => do not forward VCID tagged frames */
+ if (cxl->prio & CANXL_VCID_MASK)
+ return;
+ }
+ }
+
/* eliminate multiple filter matches for the same skb */
if (this_cpu_ptr(ro->uniq)->skb == oskb &&
this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
@@ -698,6 +721,19 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->fd_frames = ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS:
+ if (optlen != sizeof(ro->raw_vcid_opts))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&ro->raw_vcid_opts, optval, optlen))
+ return -EFAULT;
+
+ /* prepare 32 bit values for handling in hot path */
+ ro->tx_vcid_shifted = ro->raw_vcid_opts.tx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_shifted = ro->raw_vcid_opts.rx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_mask_shifted = ro->raw_vcid_opts.rx_vcid_mask << CANXL_VCID_OFFSET;
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
@@ -720,7 +756,6 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
struct raw_sock *ro = raw_sk(sk);
int len;
void *val;
- int err = 0;
if (level != SOL_CAN_RAW)
return -EINVAL;
@@ -730,7 +765,9 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
switch (optname) {
- case CAN_RAW_FILTER:
+ case CAN_RAW_FILTER: {
+ int err = 0;
+
lock_sock(sk);
if (ro->count > 0) {
int fsize = ro->count * sizeof(struct can_filter);
@@ -755,7 +792,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (!err)
err = put_user(len, optlen);
return err;
-
+ }
case CAN_RAW_ERR_FILTER:
if (len > sizeof(can_err_mask_t))
len = sizeof(can_err_mask_t);
@@ -786,6 +823,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
val = &ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS: {
+ int err = 0;
+
+ /* user space buffer to small for VCID opts? */
+ if (len < sizeof(ro->raw_vcid_opts)) {
+ /* return -ERANGE and needed space in optlen */
+ err = -ERANGE;
+ if (put_user(sizeof(ro->raw_vcid_opts), optlen))
+ err = -EFAULT;
+ } else {
+ if (len > sizeof(ro->raw_vcid_opts))
+ len = sizeof(ro->raw_vcid_opts);
+ if (copy_to_user(optval, &ro->raw_vcid_opts, len))
+ err = -EFAULT;
+ }
+ if (!err)
+ err = put_user(len, optlen);
+ return err;
+ }
case CAN_RAW_JOIN_FILTERS:
if (len > sizeof(int))
len = sizeof(int);
@@ -803,23 +859,41 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb)
+{
+ struct canxl_frame *cxl = (struct canxl_frame *)skb->data;
+
+ /* sanitize non CAN XL bits */
+ cxl->prio &= (CANXL_PRIO_MASK | CANXL_VCID_MASK);
+
+ /* clear VCID in CAN XL frame if pass through is disabled */
+ if (!(ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_PASS))
+ cxl->prio &= CANXL_PRIO_MASK;
+
+ /* set VCID in CAN XL frame if enabled */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_SET) {
+ cxl->prio &= CANXL_PRIO_MASK;
+ cxl->prio |= ro->tx_vcid_shifted;
+ }
+}
+
+static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
{
/* Classical CAN -> no checks for flags and device capabilities */
if (can_is_can_skb(skb))
- return false;
+ return CAN_MTU;
/* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
if (ro->fd_frames && can_is_canfd_skb(skb) &&
(mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
- return false;
+ return CANFD_MTU;
/* CAN XL -> needs to be enabled and a CAN XL device */
if (ro->xl_frames && can_is_canxl_skb(skb) &&
can_is_canxl_dev_mtu(mtu))
- return false;
+ return CANXL_MTU;
- return true;
+ return 0;
}
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -829,6 +903,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
+ unsigned int txmtu;
int ifindex;
int err = -EINVAL;
@@ -869,9 +944,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (raw_bad_txframe(ro, skb, dev->mtu))
+
+ /* check for valid CAN (CC/FD/XL) frame content */
+ txmtu = raw_check_txframe(ro, skb, dev->mtu);
+ if (!txmtu)
goto free_skb;
+ /* only CANXL: clear/forward/set VCID value */
+ if (txmtu == CANXL_MTU)
+ raw_put_canxl_vcid(ro, skb);
+
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 1daf95e17d..3a5bd1cd1e 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -429,7 +429,10 @@ static int is_out(const struct crush_map *map,
/**
* crush_choose_firstn - choose numrep distinct items of given type
* @map: the crush_map
+ * @work: working space initialized by crush_init_workspace()
* @bucket: the bucket we are choose an item from
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
* @x: crush input value
* @numrep: the number of items to choose
* @type: the type of item to choose
@@ -445,6 +448,7 @@ static int is_out(const struct crush_map *map,
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
+ * @choose_args: weights and ids for each known bucket
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_work *work,
@@ -636,9 +640,8 @@ reject:
}
-/**
+/*
* crush_choose_indep: alternative breadth-first positionally stable mapping
- *
*/
static void crush_choose_indep(const struct crush_map *map,
struct crush_work *work,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index f263f7e91a..ab66b599ac 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
struct ceph_mon_client *monc =
container_of(work, struct ceph_mon_client, delayed_work.work);
- dout("monc delayed_work\n");
mutex_lock(&monc->mutex);
+ dout("%s mon%d\n", __func__, monc->cur_mon);
+ if (monc->cur_mon < 0) {
+ goto out;
+ }
+
if (monc->hunting) {
dout("%s continuing hunt\n", __func__);
reopen_session(monc);
} else {
int is_auth = ceph_auth_is_authenticated(monc->auth);
+
+ dout("%s is_authed %d\n", __func__, is_auth);
if (ceph_con_keepalive_expired(&monc->con,
CEPH_MONC_PING_TIMEOUT)) {
dout("monc keepalive timeout\n");
@@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
}
}
__schedule_delayed(monc);
+
+out:
mutex_unlock(&monc->mutex);
}
@@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
void ceph_monc_stop(struct ceph_mon_client *monc)
{
dout("stop\n");
- cancel_delayed_work_sync(&monc->delayed_work);
mutex_lock(&monc->mutex);
__close_session(monc);
+ monc->hunting = false;
monc->cur_mon = -1;
mutex_unlock(&monc->mutex);
+ cancel_delayed_work_sync(&monc->delayed_work);
+
/*
* flush msgr queue before we destroy ourselves to ensure that:
* - any work that references our embedded con is finished.
diff --git a/net/core/Makefile b/net/core/Makefile
index 821aec06ab..62be9aef25 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,6 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
+obj-y += hotdata.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
@@ -25,6 +26,7 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+obj-$(CONFIG_NET_IEEE8021Q_HELPERS) += ieee8021q_helpers.o
obj-$(CONFIG_NET_SELFTESTS) += selftests.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
@@ -40,4 +42,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
-obj-$(CONFIG_NET_TEST) += gso_test.o
+obj-$(CONFIG_NET_TEST) += net_test.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 6c4d90b24d..bc01b3aa6b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -496,27 +496,22 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
if (!bpf_capable())
return ERR_PTR(-EPERM);
- nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
- if (nla_len(nla) != sizeof(u32))
- return ERR_PTR(-EINVAL);
- nr_maps++;
- }
+ nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
+ nla_stgs, rem) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
+ nr_maps++;
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
if (!diag)
return ERR_PTR(-ENOMEM);
- nla_for_each_nested(nla, nla_stgs, rem) {
- struct bpf_map *map;
- int map_fd;
-
- if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
- continue;
+ nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
+ nla_stgs, rem) {
+ int map_fd = nla_get_u32(nla);
+ struct bpf_map *map = bpf_map_get(map_fd);
- map_fd = nla_get_u32(nla);
- map = bpf_map_get(map_fd);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto err_free;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a8b625abe2..e72dd78471 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -324,25 +324,6 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_free_datagram);
-void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
-{
- bool slow;
-
- if (!skb_unref(skb)) {
- sk_peek_offset_bwd(sk, len);
- return;
- }
-
- slow = lock_sock_fast(sk);
- sk_peek_offset_bwd(sk, len);
- skb_orphan(skb);
- unlock_sock_fast(sk, slow);
-
- /* skb is now orphaned, can be freed outside of locked section */
- __kfree_skb(skb);
-}
-EXPORT_SYMBOL(__skb_free_datagram_locked);
-
int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
@@ -435,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
if (copy > len)
copy = len;
- n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
- vaddr + skb_frag_off(frag) + offset - start,
- copy, data, to);
- kunmap(page);
+
+ n = 0;
+ skb_frag_foreach_page(frag,
+ skb_frag_off(frag) + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_local_page(p);
+ n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
+ vaddr + p_off, p_len, data, to);
+ kunmap_local(vaddr);
+ }
+
offset += n;
if (n != copy)
goto short_copy;
diff --git a/net/core/dev.c b/net/core/dev.c
index c365aa06f8..2b4819b610 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -77,7 +77,9 @@
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/mm.h>
+#include <linux/smpboot.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/string.h>
@@ -153,41 +155,21 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-struct list_head ptype_all __read_mostly; /* Taps */
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-/*
- * The @dev_base_head list is protected by @dev_base_lock and the rtnl
- * semaphore.
- *
- * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
- *
- * Writers must hold the rtnl semaphore while they loop through the
- * dev_base_head list, and hold dev_base_lock for writing when they do the
- * actual updates. This allows pure readers to access the list even
- * while a writer is preparing to update it.
- *
- * To put it another way, dev_base_lock is held for writing only to
- * protect against pure readers; the rtnl semaphore provides the
- * protection against other writers.
- *
- * See, for example usages, register_netdevice() and
- * unregister_netdevice(), which must be called with the rtnl
- * semaphore held.
- */
-DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_lock);
-
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -200,8 +182,9 @@ static DECLARE_RWSEM(devnet_rename_sem);
static inline void dev_base_seq_inc(struct net *net)
{
- while (++net->dev_base_seq == 0)
- ;
+ unsigned int val = net->dev_base_seq + 1;
+
+ WRITE_ONCE(net->dev_base_seq, val ?: 1);
}
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
@@ -216,35 +199,60 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
-static inline void rps_lock_irqsave(struct softnet_data *sd,
- unsigned long *flags)
+#ifndef CONFIG_PREEMPT_RT
+
+static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key);
+
+static int __init setup_backlog_napi_threads(char *arg)
+{
+ static_branch_enable(&use_backlog_threads_key);
+ return 0;
+}
+early_param("thread_backlog_napi", setup_backlog_napi_threads);
+
+static bool use_backlog_threads(void)
+{
+ return static_branch_unlikely(&use_backlog_threads_key);
+}
+
+#else
+
+static bool use_backlog_threads(void)
+{
+ return true;
+}
+
+#endif
+
+static inline void backlog_lock_irq_save(struct softnet_data *sd,
+ unsigned long *flags)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
}
-static inline void rps_lock_irq_disable(struct softnet_data *sd)
+static inline void backlog_lock_irq_disable(struct softnet_data *sd)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_lock_irq(&sd->input_pkt_queue.lock);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_disable();
}
-static inline void rps_unlock_irq_restore(struct softnet_data *sd,
- unsigned long *flags)
+static inline void backlog_unlock_irq_restore(struct softnet_data *sd,
+ unsigned long *flags)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
-static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+static inline void backlog_unlock_irq_enable(struct softnet_data *sd)
{
- if (IS_ENABLED(CONFIG_RPS))
+ if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
spin_unlock_irq(&sd->input_pkt_queue.lock);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_enable();
@@ -341,13 +349,22 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return 0;
}
-static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+static void netdev_name_node_alt_free(struct rcu_head *head)
{
- list_del(&name_node->list);
+ struct netdev_name_node *name_node =
+ container_of(head, struct netdev_name_node, rcu);
+
kfree(name_node->name);
netdev_name_node_free(name_node);
}
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ netdev_name_node_del(name_node);
+ list_del(&name_node->list);
+ call_rcu(&name_node->rcu, netdev_name_node_alt_free);
+}
+
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -362,10 +379,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
- netdev_name_node_del(name_node);
- synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
-
return 0;
}
@@ -373,8 +387,10 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
{
struct netdev_name_node *name_node, *tmp;
- list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
- __netdev_name_node_alt_destroy(name_node);
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
+ list_del(&name_node->list);
+ netdev_name_node_alt_free(&name_node->rcu);
+ }
}
/* Device list insertion */
@@ -385,12 +401,10 @@ static void list_netdevice(struct net_device *dev)
ASSERT_RTNL();
- write_lock(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
- write_unlock(&dev_base_lock);
netdev_for_each_altname(dev, name_node)
netdev_name_node_add(net, name_node);
@@ -404,7 +418,7 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev, bool lock)
+static void unlist_netdevice(struct net_device *dev)
{
struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
@@ -417,13 +431,9 @@ static void unlist_netdevice(struct net_device *dev, bool lock)
netdev_name_node_del(name_node);
/* Unlink dev from the device chain */
- if (lock)
- write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- if (lock)
- write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -442,6 +452,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
+/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ * PP consumers must pay attention to run APIs in the appropriate context
+ * (e.g. NAPI context).
+ */
+static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
+
#ifdef CONFIG_LOCKDEP
/*
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -551,7 +567,7 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return pt->dev ? &pt->dev->ptype_all : &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
@@ -653,7 +669,7 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
}
EXPORT_SYMBOL(dev_get_iflink);
@@ -738,9 +754,9 @@ EXPORT_SYMBOL_GPL(dev_fill_forward_path);
* @net: the applicable net namespace
* @name: name to find
*
- * Find an interface by name. Must be called under RTNL semaphore
- * or @dev_base_lock. If the name is found a pointer to the device
- * is returned. If the name is not found then %NULL is returned. The
+ * Find an interface by name. Must be called under RTNL semaphore.
+ * If the name is found a pointer to the device is returned.
+ * If the name is not found then %NULL is returned. The
* reference counters are not incremented so the caller must be
* careful with locks.
*/
@@ -821,8 +837,7 @@ EXPORT_SYMBOL(netdev_get_by_name);
* Search for an interface by index. Returns %NULL if the device
* is not found or a pointer to the device. The device has not
* had its reference counter increased so the caller must be careful
- * about locking. The caller must hold either the RTNL semaphore
- * or @dev_base_lock.
+ * about locking. The caller must hold the RTNL semaphore.
*/
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
@@ -924,6 +939,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
}
EXPORT_SYMBOL(dev_get_by_napi_id);
+static DEFINE_SEQLOCK(netdev_rename_lock);
+
+void netdev_copy_name(struct net_device *dev, char *name)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&netdev_rename_lock);
+ strscpy(name, dev->name, IFNAMSIZ);
+ } while (read_seqretry(&netdev_rename_lock, seq));
+}
+
/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
@@ -935,7 +962,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
struct net_device *dev;
int ret;
- down_read(&devnet_rename_sem);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
@@ -944,12 +970,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
goto out;
}
- strcpy(name, dev->name);
+ netdev_copy_name(dev, name);
ret = 0;
out:
rcu_read_unlock();
- up_read(&devnet_rename_sem);
return ret;
}
@@ -1201,7 +1226,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
+ write_seqlock_bh(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
+ write_sequnlock_bh(&netdev_rename_lock);
+
if (err < 0) {
up_write(&devnet_rename_sem);
return err;
@@ -1212,13 +1240,13 @@ int dev_change_name(struct net_device *dev, const char *newname)
dev->flags & IFF_UP ? " (while UP)" : "");
old_assign_type = dev->name_assign_type;
- dev->name_assign_type = NET_NAME_RENAMED;
+ WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
up_write(&devnet_rename_sem);
return ret;
}
@@ -1227,15 +1255,11 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
- write_lock(&dev_base_lock);
netdev_name_node_del(dev->name_node);
- write_unlock(&dev_base_lock);
- synchronize_rcu();
+ synchronize_net();
- write_lock(&dev_base_lock);
netdev_name_node_add(net, dev->name_node);
- write_unlock(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
@@ -1245,9 +1269,11 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
+ write_seqlock_bh(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock_bh(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
@@ -2073,6 +2099,11 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
+EXPORT_SYMBOL(tcf_bypass_check_needed_key);
+#endif
+
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
@@ -2242,7 +2273,8 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
*/
bool dev_nit_active(struct net_device *dev)
{
- return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+ return !list_empty(&net_hotdata.ptype_all) ||
+ !list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);
@@ -2253,10 +2285,9 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
- struct packet_type *ptype;
+ struct list_head *ptype_list = &net_hotdata.ptype_all;
+ struct packet_type *ptype, *pt_prev = NULL;
struct sk_buff *skb2 = NULL;
- struct packet_type *pt_prev = NULL;
- struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
again:
@@ -2302,7 +2333,7 @@ again:
pt_prev = ptype;
}
- if (ptype_list == &ptype_all) {
+ if (ptype_list == &net_hotdata.ptype_all) {
ptype_list = &dev->ptype_all;
goto again;
}
@@ -3933,6 +3964,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
if (!miniq)
return ret;
+ if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
+ if (tcf_block_bypass_sw(miniq->block))
+ return ret;
+ }
+
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
tcf_set_drop_reason(skb, *drop_reason);
@@ -4426,20 +4462,11 @@ EXPORT_SYMBOL(__dev_direct_xmit);
/*************************************************************************
* Receiver routines
*************************************************************************/
+static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
-int netdev_max_backlog __read_mostly = 1000;
-EXPORT_SYMBOL(netdev_max_backlog);
-
-int netdev_tstamp_prequeue __read_mostly = 1;
-unsigned int sysctl_skb_defer_max __read_mostly = 64;
-int netdev_budget __read_mostly = 300;
-/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
-unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
-int dev_rx_weight __read_mostly = 64;
-int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4458,18 +4485,16 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
thread = READ_ONCE(napi->thread);
if (thread) {
- /* Avoid doing set_bit() if the thread is in
- * INTERRUPTIBLE state, cause napi_thread_wait()
- * makes sure to proceed with napi polling
- * if the thread is explicitly woken from here.
- */
- if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
- set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
+ if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
+ goto use_local_napi;
+
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread);
return;
}
}
+use_local_napi:
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -4481,12 +4506,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
#ifdef CONFIG_RPS
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-u32 rps_cpu_mask __read_mostly;
-EXPORT_SYMBOL(rps_cpu_mask);
-
struct static_key_false rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
struct static_key_false rfs_needed __read_mostly;
@@ -4497,12 +4516,13 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
if (next_cpu < nr_cpu_ids) {
+ u32 head;
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *old_rflow;
- u32 flow_id;
u16 rxq_index;
+ u32 flow_id;
int rc;
/* Should we steer this flow to a different hardware queue? */
@@ -4524,16 +4544,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
goto out;
old_rflow = rflow;
rflow = &flow_table->flows[flow_id];
- rflow->filter = rc;
- if (old_rflow->filter == rflow->filter)
- old_rflow->filter = RPS_NO_FILTER;
+ WRITE_ONCE(rflow->filter, rc);
+ if (old_rflow->filter == rc)
+ WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER);
out:
#endif
- rflow->last_qtail =
- per_cpu(softnet_data, next_cpu).input_queue_head;
+ head = READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head);
+ rps_input_queue_tail_save(&rflow->last_qtail, head);
}
- rflow->cpu = next_cpu;
+ WRITE_ONCE(rflow->cpu, next_cpu);
return rflow;
}
@@ -4578,7 +4598,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (!hash)
goto done;
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
if (flow_table && sock_flow_table) {
struct rps_dev_flow *rflow;
u32 next_cpu;
@@ -4588,10 +4608,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
* This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
*/
ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
- if ((ident ^ hash) & ~rps_cpu_mask)
+ if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask)
goto try_rps;
- next_cpu = ident & rps_cpu_mask;
+ next_cpu = ident & net_hotdata.rps_cpu_mask;
/* OK, now we know there is a match,
* we can look at the local (per receive queue) flow table
@@ -4612,7 +4632,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
*/
if (unlikely(tcpu != next_cpu) &&
(tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
- ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+ ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
rflow->last_qtail)) >= 0)) {
tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
@@ -4666,9 +4686,9 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
cpu = READ_ONCE(rflow->cpu);
- if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
- ((int)(per_cpu(softnet_data, cpu).input_queue_head -
- rflow->last_qtail) <
+ if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
+ ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
+ READ_ONCE(rflow->last_qtail)) <
(int)(10 * flow_table->mask)))
expire = false;
}
@@ -4715,6 +4735,11 @@ static void napi_schedule_rps(struct softnet_data *sd)
#ifdef CONFIG_RPS
if (sd != mysd) {
+ if (use_backlog_threads()) {
+ __napi_schedule_irqoff(&sd->backlog);
+ return;
+ }
+
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -4729,6 +4754,23 @@ static void napi_schedule_rps(struct softnet_data *sd)
__napi_schedule_irqoff(&mysd->backlog);
}
+void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu)
+{
+ unsigned long flags;
+
+ if (use_backlog_threads()) {
+ backlog_lock_irq_save(sd, &flags);
+
+ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+ __napi_schedule_irqoff(&sd->backlog);
+
+ backlog_unlock_irq_restore(sd, &flags);
+
+ } else if (!cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
+ smp_call_function_single_async(cpu, &sd->defer_csd);
+ }
+}
+
#ifdef CONFIG_NET_FLOW_LIMIT
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
#endif
@@ -4740,7 +4782,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
+ if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4780,36 +4822,45 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ int max_backlog;
+ u32 tail;
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ reason = SKB_DROP_REASON_DEV_READY;
+ if (!netif_running(skb->dev))
+ goto bad_dev;
+
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
sd = &per_cpu(softnet_data, cpu);
- rps_lock_irqsave(sd, &flags);
- if (!netif_running(skb->dev))
- goto drop;
+ qlen = skb_queue_len_lockless(&sd->input_pkt_queue);
+ max_backlog = READ_ONCE(net_hotdata.max_backlog);
+ if (unlikely(qlen > max_backlog))
+ goto cpu_backlog_drop;
+ backlog_lock_irq_save(sd, &flags);
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
- if (qlen) {
-enqueue:
- __skb_queue_tail(&sd->input_pkt_queue, skb);
- input_queue_tail_incr_save(sd, qtail);
- rps_unlock_irq_restore(sd, &flags);
- return NET_RX_SUCCESS;
+ if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (!qlen) {
+ /* Schedule NAPI for backlog device. We can use
+ * non atomic operation as we own the queue lock.
+ */
+ if (!__test_and_set_bit(NAPI_STATE_SCHED,
+ &sd->backlog.state))
+ napi_schedule_rps(sd);
}
+ __skb_queue_tail(&sd->input_pkt_queue, skb);
+ tail = rps_input_queue_tail_incr(sd);
+ backlog_unlock_irq_restore(sd, &flags);
- /* Schedule NAPI for backlog device
- * We can use non atomic operation since we own the queue lock
- */
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
- napi_schedule_rps(sd);
- goto enqueue;
+ /* save the tail outside of the critical section */
+ rps_input_queue_tail_save(qtail, tail);
+ return NET_RX_SUCCESS;
}
- reason = SKB_DROP_REASON_CPU_BACKLOG;
-drop:
- sd->dropped++;
- rps_unlock_irq_restore(sd, &flags);
+ backlog_unlock_irq_restore(sd, &flags);
+cpu_backlog_drop:
+ atomic_inc(&sd->dropped);
+bad_dev:
dev_core_stats_rx_dropped_inc(skb->dev);
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
@@ -4864,6 +4915,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
skb_headlen(skb) + mac_len, true);
+ if (skb_is_nonlinear(skb)) {
+ skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ xdp_buff_set_frags_flag(xdp);
+ } else {
+ xdp_buff_clear_frags_flag(xdp);
+ }
orig_data_end = xdp->data_end;
orig_data = xdp->data;
@@ -4893,6 +4950,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
skb->len += off; /* positive on grow, negative on shrink */
}
+ /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+ * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+ */
+ if (xdp_buff_has_frags(xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
@@ -4926,11 +4991,35 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
return act;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+static int
+netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+{
+ struct sk_buff *skb = *pskb;
+ int err, hroom, troom;
+
+ if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ return 0;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ troom = skb->tail + skb->data_len - skb->end;
+ err = pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+ if (err)
+ return err;
+
+ return skb_linearize(skb);
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- u32 act = XDP_DROP;
+ struct sk_buff *skb = *pskb;
+ u32 mac_len, act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4938,41 +5027,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_is_redirected(skb))
return XDP_PASS;
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
+ /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+ * bytes. This is the guarantee that also native XDP provides,
+ * thus we need to do it here as well.
*/
+ mac_len = skb->data - skb_mac_header(skb);
+ __skb_push(skb, mac_len);
+
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
+ if (netif_skb_check_for_xdp(pskb, xdp_prog))
goto do_drop;
}
- act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ __skb_pull(*pskb, mac_len);
+
+ act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
case XDP_PASS:
break;
default:
- bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+ bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(skb->dev, xdp_prog, act);
+ trace_xdp_exception((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
do_drop:
- kfree_skb(skb);
+ kfree_skb(*pskb);
break;
}
@@ -5010,24 +5094,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
- act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+ act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
case XDP_REDIRECT:
- err = xdp_do_generic_redirect(skb->dev, skb,
+ err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
&xdp, xdp_prog);
if (err)
goto out_redir;
break;
case XDP_TX:
- generic_xdp_tx(skb, xdp_prog);
+ generic_xdp_tx(*pskb, xdp_prog);
break;
}
return XDP_DROP;
@@ -5035,7 +5119,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+ kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -5044,7 +5128,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5336,7 +5420,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5358,7 +5442,8 @@ another_round:
int ret2;
migrate_disable();
- ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ &skb);
migrate_enable();
if (ret2 != XDP_PASS) {
@@ -5379,7 +5464,7 @@ another_round:
if (pfmemalloc)
goto skip_taps;
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -5719,7 +5804,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5749,7 +5834,8 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
+ skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5839,21 +5925,21 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
dev_kfree_skb_irq(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
local_bh_enable();
@@ -5865,14 +5951,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush;
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
/* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue
*/
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue);
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
return do_flush;
#endif
@@ -5938,7 +6024,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
#ifdef CONFIG_RPS
struct softnet_data *remsd = sd->rps_ipi_list;
- if (remsd) {
+ if (!use_backlog_threads() && remsd) {
sd->rps_ipi_list = NULL;
local_irq_enable();
@@ -5953,7 +6039,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- return sd->rps_ipi_list != NULL;
+ return !use_backlog_threads() && sd->rps_ipi_list;
#else
return false;
#endif
@@ -5973,7 +6059,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = READ_ONCE(dev_rx_weight);
+ napi->weight = READ_ONCE(net_hotdata.dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -5981,13 +6067,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
rcu_read_lock();
__netif_receive_skb(skb);
rcu_read_unlock();
- input_queue_head_incr(sd);
- if (++work >= quota)
+ if (++work >= quota) {
+ rps_input_queue_head_add(sd, work);
return work;
+ }
}
- rps_lock_irq_disable(sd);
+ backlog_lock_irq_disable(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
@@ -5997,15 +6084,17 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
- napi->state = 0;
+ napi->state &= NAPIF_STATE_THREADED;
again = false;
} else {
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
}
- rps_unlock_irq_enable(sd);
+ backlog_unlock_irq_enable(sd);
}
+ if (work)
+ rps_input_queue_head_add(sd, work);
return work;
}
@@ -6162,6 +6251,27 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
return NULL;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock(&sd->defer_lock);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock(&sd->defer_lock);
+
+ while (skb != NULL) {
+ next = skb->next;
+ napi_consume_skb(skb, 1);
+ skb = next;
+ }
+}
+
#if defined(CONFIG_NET_RX_BUSY_POLL)
static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
@@ -6183,8 +6293,13 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
- u16 budget)
+enum {
+ NAPI_F_PREFER_BUSY_POLL = 1,
+ NAPI_F_END_ON_RESCHED = 2,
+};
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
+ unsigned flags, u16 budget)
{
bool skip_schedule = false;
unsigned long timeout;
@@ -6204,7 +6319,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_disable();
- if (prefer_busy_poll) {
+ if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
timeout = READ_ONCE(napi->dev->gro_flush_timeout);
if (napi->defer_hard_irqs_count && timeout) {
@@ -6228,23 +6343,23 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_enable();
}
-void napi_busy_loop(unsigned int napi_id,
- bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+static void __napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, unsigned flags, u16 budget)
{
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
restart:
napi_poll = NULL;
- rcu_read_lock();
-
napi = napi_by_id(napi_id);
if (!napi)
- goto out;
+ return;
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
@@ -6260,14 +6375,14 @@ restart:
*/
if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
NAPIF_STATE_IN_BUSY_POLL)) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
if (cmpxchg(&napi->state, val,
val | NAPIF_STATE_IN_BUSY_POLL |
NAPIF_STATE_SCHED) != val) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
@@ -6281,18 +6396,22 @@ count:
if (work > 0)
__NET_ADD_STATS(dev_net(napi->dev),
LINUX_MIB_BUSYPOLLRXPACKETS, work);
+ skb_defer_free_flush(this_cpu_ptr(&softnet_data));
local_bh_enable();
if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
+ if (flags & NAPI_F_END_ON_RESCHED)
+ break;
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
rcu_read_unlock();
cond_resched();
+ rcu_read_lock();
if (loop_end(loop_end_arg, start_time))
return;
goto restart;
@@ -6300,10 +6419,31 @@ count:
cpu_relax();
}
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
-out:
+}
+
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = NAPI_F_END_ON_RESCHED;
+
+ if (prefer_busy_poll)
+ flags |= NAPI_F_PREFER_BUSY_POLL;
+
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+}
+
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+
+ rcu_read_lock();
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
rcu_read_unlock();
}
EXPORT_SYMBOL(napi_busy_loop);
@@ -6391,7 +6531,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
}
- dev->threaded = threaded;
+ WRITE_ONCE(dev->threaded, threaded);
/* Make sure kthread is created before THREADED bit
* is set.
@@ -6482,7 +6622,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = 0;
+ dev->threaded = false;
netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -6660,8 +6800,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
static int napi_thread_wait(struct napi_struct *napi)
{
- bool woken = false;
-
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
@@ -6670,15 +6808,13 @@ static int napi_thread_wait(struct napi_struct *napi)
* Testing SCHED bit is not enough because SCHED bit might be
* set by some other busy poll thread or by napi_disable().
*/
- if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state)) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
}
schedule();
- /* woken being true indicates this thread owns this napi. */
- woken = true;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
@@ -6686,64 +6822,48 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1;
}
-static void skb_defer_free_flush(struct softnet_data *sd)
+static void napi_threaded_poll_loop(struct napi_struct *napi)
{
- struct sk_buff *skb, *next;
+ struct softnet_data *sd;
+ unsigned long last_qs = jiffies;
- /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
- if (!READ_ONCE(sd->defer_list))
- return;
+ for (;;) {
+ bool repoll = false;
+ void *have;
- spin_lock(&sd->defer_lock);
- skb = sd->defer_list;
- sd->defer_list = NULL;
- sd->defer_count = 0;
- spin_unlock(&sd->defer_lock);
+ local_bh_disable();
+ sd = this_cpu_ptr(&softnet_data);
+ sd->in_napi_threaded_poll = true;
- while (skb != NULL) {
- next = skb->next;
- napi_consume_skb(skb, 1);
- skb = next;
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ sd->in_napi_threaded_poll = false;
+ barrier();
+
+ if (sd_has_rps_ipi_waiting(sd)) {
+ local_irq_disable();
+ net_rps_action_and_irq_enable(sd);
+ }
+ skb_defer_free_flush(sd);
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ rcu_softirq_qs_periodic(last_qs);
+ cond_resched();
}
}
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
- struct softnet_data *sd;
- void *have;
-
- while (!napi_thread_wait(napi)) {
- unsigned long last_qs = jiffies;
-
- for (;;) {
- bool repoll = false;
-
- local_bh_disable();
- sd = this_cpu_ptr(&softnet_data);
- sd->in_napi_threaded_poll = true;
-
- have = netpoll_poll_lock(napi);
- __napi_poll(napi, &repoll);
- netpoll_poll_unlock(have);
- sd->in_napi_threaded_poll = false;
- barrier();
-
- if (sd_has_rps_ipi_waiting(sd)) {
- local_irq_disable();
- net_rps_action_and_irq_enable(sd);
- }
- skb_defer_free_flush(sd);
- local_bh_enable();
-
- if (!repoll)
- break;
+ while (!napi_thread_wait(napi))
+ napi_threaded_poll_loop(napi);
- rcu_softirq_qs_periodic(last_qs);
- cond_resched();
- }
- }
return 0;
}
@@ -6751,8 +6871,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
- int budget = READ_ONCE(netdev_budget);
+ usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
+ int budget = READ_ONCE(net_hotdata.netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -8424,27 +8544,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
+ unsigned int promiscuity, flags;
kuid_t uid;
kgid_t gid;
ASSERT_RTNL();
- dev->flags |= IFF_PROMISC;
- dev->promiscuity += inc;
- if (dev->promiscuity == 0) {
+ promiscuity = dev->promiscuity + inc;
+ if (promiscuity == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch promisc and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_PROMISC;
- else {
- dev->promiscuity -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_PROMISC;
+ } else {
+ flags = old_flags | IFF_PROMISC;
}
- if (dev->flags != old_flags) {
+ WRITE_ONCE(dev->promiscuity, promiscuity);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s promiscuous mode\n",
dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
@@ -8495,25 +8617,27 @@ EXPORT_SYMBOL(dev_set_promiscuity);
static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
+ unsigned int allmulti, flags;
ASSERT_RTNL();
- dev->flags |= IFF_ALLMULTI;
- dev->allmulti += inc;
- if (dev->allmulti == 0) {
+ allmulti = dev->allmulti + inc;
+ if (allmulti == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch allmulti and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_ALLMULTI;
- else {
- dev->allmulti -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_ALLMULTI;
+ } else {
+ flags = old_flags | IFF_ALLMULTI;
}
- if (dev->flags ^ old_flags) {
+ WRITE_ONCE(dev->allmulti, allmulti);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s allmulticast mode\n",
dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
@@ -8595,12 +8719,12 @@ unsigned int dev_get_flags(const struct net_device *dev)
{
unsigned int flags;
- flags = (dev->flags & ~(IFF_PROMISC |
+ flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC |
IFF_ALLMULTI |
IFF_RUNNING |
IFF_LOWER_UP |
IFF_DORMANT)) |
- (dev->gflags & (IFF_PROMISC |
+ (READ_ONCE(dev->gflags) & (IFF_PROMISC |
IFF_ALLMULTI));
if (netif_running(dev)) {
@@ -8839,7 +8963,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
return -ERANGE;
if (new_len != orig_len) {
- dev->tx_queue_len = new_len;
+ WRITE_ONCE(dev->tx_queue_len, new_len);
res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
res = notifier_to_errno(res);
if (res)
@@ -8853,7 +8977,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
err_rollback:
netdev_err(dev, "refused to change device tx_queue_len\n");
- dev->tx_queue_len = orig_len;
+ WRITE_ONCE(dev->tx_queue_len, orig_len);
return res;
}
@@ -8923,7 +9047,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
}
EXPORT_SYMBOL(dev_set_mac_address);
-static DECLARE_RWSEM(dev_addr_sem);
+DECLARE_RWSEM(dev_addr_sem);
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack)
@@ -9099,7 +9223,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
netif_carrier_off(dev);
else
netif_carrier_on(dev);
- dev->proto_down = proto_down;
+ WRITE_ONCE(dev->proto_down, proto_down);
return 0;
}
@@ -9113,18 +9237,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
u32 value)
{
+ u32 proto_down_reason;
int b;
if (!mask) {
- dev->proto_down_reason = value;
+ proto_down_reason = value;
} else {
+ proto_down_reason = dev->proto_down_reason;
for_each_set_bit(b, &mask, 32) {
if (value & (1 << b))
- dev->proto_down_reason |= BIT(b);
+ proto_down_reason |= BIT(b);
else
- dev->proto_down_reason &= ~BIT(b);
+ proto_down_reason &= ~BIT(b);
}
}
+ WRITE_ONCE(dev->proto_down_reason, proto_down_reason);
}
struct bpf_xdp_link {
@@ -9677,11 +9804,11 @@ static void dev_index_release(struct net *net, int ifindex)
/* Delayed registration/unregisteration */
LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+atomic_t dev_unreg_count = ATOMIC_INIT(0);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -10246,9 +10373,9 @@ int register_netdevice(struct net_device *dev)
goto err_ifindex_release;
ret = netdev_register_kobject(dev);
- write_lock(&dev_base_lock);
- dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
- write_unlock(&dev_base_lock);
+
+ WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
+
if (ret)
goto err_uninit_notify;
@@ -10314,25 +10441,12 @@ err_free_name:
}
EXPORT_SYMBOL(register_netdevice);
-/**
- * init_dummy_netdev - init a dummy network device for NAPI
- * @dev: device to init
- *
- * This takes a network device structure and initialize the minimum
- * amount of fields so it can be used to schedule NAPI polls without
- * registering a full blown interface. This is to be used by drivers
- * that need to tie several hardware interfaces to a single NAPI
- * poll scheduler due to HW limitations.
+/* Initialize the core of a dummy net device.
+ * This is useful if you are calling this function after alloc_netdev(),
+ * since it does not memset the net_device fields.
*/
-int init_dummy_netdev(struct net_device *dev)
+static void init_dummy_netdev_core(struct net_device *dev)
{
- /* Clear everything. Note we don't initialize spinlocks
- * are they aren't supposed to be taken by any of the
- * NAPI code and this dummy netdev is supposed to be
- * only ever used for NAPI polls
- */
- memset(dev, 0, sizeof(struct net_device));
-
/* make sure we BUG if trying to hit standard
* register/unregister code path
*/
@@ -10352,12 +10466,30 @@ int init_dummy_netdev(struct net_device *dev)
* because users of this 'device' dont need to change
* its refcount.
*/
+}
- return 0;
+/**
+ * init_dummy_netdev - init a dummy network device for NAPI
+ * @dev: device to init
+ *
+ * This takes a network device structure and initializes the minimum
+ * amount of fields so it can be used to schedule NAPI polls without
+ * registering a full blown interface. This is to be used by drivers
+ * that need to tie several hardware interfaces to a single NAPI
+ * poll scheduler due to HW limitations.
+ */
+void init_dummy_netdev(struct net_device *dev)
+{
+ /* Clear everything. Note we don't initialize spinlocks
+ * as they aren't supposed to be taken by any of the
+ * NAPI code and this dummy netdev is supposed to be
+ * only ever used for NAPI polls
+ */
+ memset(dev, 0, sizeof(struct net_device));
+ init_dummy_netdev_core(dev);
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
-
/**
* register_netdev - register a network device
* @dev: device to register
@@ -10455,8 +10587,9 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
rebroadcast_time = jiffies;
}
+ rcu_barrier();
+
if (!wait) {
- rcu_barrier();
wait = WAIT_REFS_MIN_MSECS;
} else {
msleep(wait);
@@ -10508,6 +10641,7 @@ void netdev_run_todo(void)
{
struct net_device *dev, *tmp;
struct list_head list;
+ int cnt;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -10538,12 +10672,11 @@ void netdev_run_todo(void)
continue;
}
- write_lock(&dev_base_lock);
- dev->reg_state = NETREG_UNREGISTERED;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
linkwatch_sync_dev(dev);
}
+ cnt = 0;
while (!list_empty(&list)) {
dev = netdev_wait_allrefs_any(&list);
list_del(&dev->todo_list);
@@ -10561,12 +10694,13 @@ void netdev_run_todo(void)
if (dev->needs_free_netdev)
free_netdev(dev);
- if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
- wake_up(&netdev_unregistering_wq);
+ cnt++;
/* Free network device */
kobject_put(&dev->dev.kobj);
}
+ if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
}
/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
@@ -10643,6 +10777,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
ops->ndo_get_stats64(dev, storage);
} else if (ops->ndo_get_stats) {
netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
+ } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
+ dev_get_tstats64(dev, storage);
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
@@ -10951,13 +11087,14 @@ void free_netdev(struct net_device *dev)
dev->xdp_bulkq = NULL;
/* Compatibility with error handling in drivers */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
+ if (dev->reg_state == NETREG_UNINITIALIZED ||
+ dev->reg_state == NETREG_DUMMY) {
netdev_freemem(dev);
return;
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
- dev->reg_state = NETREG_RELEASED;
+ WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
/* will free via device release */
put_device(&dev->dev);
@@ -10965,6 +11102,19 @@ void free_netdev(struct net_device *dev)
EXPORT_SYMBOL(free_netdev);
/**
+ * alloc_netdev_dummy - Allocate and initialize a dummy net device.
+ * @sizeof_priv: size of private data to allocate space for
+ *
+ * Return: the allocated net_device on success, NULL otherwise
+ */
+struct net_device *alloc_netdev_dummy(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "dummy#", NET_NAME_UNKNOWN,
+ init_dummy_netdev_core);
+}
+EXPORT_SYMBOL_GPL(alloc_netdev_dummy);
+
+/**
* synchronize_net - Synchronize with packet receive processing
*
* Wait for packets currently being received to be done.
@@ -11013,6 +11163,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
{
struct net_device *dev, *tmp;
LIST_HEAD(close_head);
+ int cnt = 0;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -11044,10 +11195,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- write_lock(&dev_base_lock);
- unlist_netdevice(dev, false);
- dev->reg_state = NETREG_UNREGISTERING;
- write_unlock(&dev_base_lock);
+ unlist_netdevice(dev);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
}
flush_all_backlogs();
@@ -11109,7 +11258,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
netdev_put(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
+ cnt++;
}
+ atomic_add(cnt, &dev_unreg_count);
list_del(head);
}
@@ -11227,7 +11378,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev, true);
+ unlist_netdevice(dev);
synchronize_net();
@@ -11266,8 +11417,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- if (new_name[0]) /* Rename the netdev to prepared name */
+ if (new_name[0]) {
+ /* Rename the netdev to prepared name */
+ write_seqlock_bh(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
+ write_sequnlock_bh(&netdev_rename_lock);
+ }
/* Fixup kobjects */
dev_set_uevent_suppress(&dev->dev, 1);
@@ -11342,7 +11497,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
list_del_init(&napi->poll_list);
if (napi->poll == process_backlog)
- napi->state = 0;
+ napi->state &= NAPIF_STATE_THREADED;
else
____napi_schedule(sd, napi);
}
@@ -11350,21 +11505,23 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+ if (!use_backlog_threads()) {
#ifdef CONFIG_RPS
- remsd = oldsd->rps_ipi_list;
- oldsd->rps_ipi_list = NULL;
+ remsd = oldsd->rps_ipi_list;
+ oldsd->rps_ipi_list = NULL;
#endif
- /* send out pending IPI's on offline CPU */
- net_rps_send_ipi(remsd);
+ /* send out pending IPI's on offline CPU */
+ net_rps_send_ipi(remsd);
+ }
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
return 0;
@@ -11563,11 +11720,8 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%%d");
netdev_for_each_altname_safe(dev, name_node, tmp)
- if (netdev_name_in_use(&init_net, name_node->name)) {
- netdev_name_node_del(name_node);
- synchronize_rcu();
+ if (netdev_name_in_use(&init_net, name_node->name))
__netdev_name_node_alt_destroy(name_node);
- }
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
@@ -11675,6 +11829,60 @@ static void __init net_dev_struct_check(void)
*
*/
+/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE)
+
+static int net_page_pool_create(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ struct page_pool_params page_pool_params = {
+ .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+ .flags = PP_FLAG_SYSTEM_POOL,
+ .nid = cpu_to_mem(cpuid),
+ };
+ struct page_pool *pp_ptr;
+
+ pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+ if (IS_ERR(pp_ptr))
+ return -ENOMEM;
+
+ per_cpu(system_page_pool, cpuid) = pp_ptr;
+#endif
+ return 0;
+}
+
+static int backlog_napi_should_run(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
+}
+
+static void run_backlog_napi(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+
+ napi_threaded_poll_loop(&sd->backlog);
+}
+
+static void backlog_napi_setup(unsigned int cpu)
+{
+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
+ struct napi_struct *napi = &sd->backlog;
+
+ napi->thread = this_cpu_read(backlog_napi);
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+}
+
+static struct smp_hotplug_thread backlog_threads = {
+ .store = &backlog_napi,
+ .thread_should_run = backlog_napi_should_run,
+ .thread_fn = run_backlog_napi,
+ .thread_comm = "backlog_napi/%u",
+ .setup = backlog_napi_setup,
+};
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11693,7 +11901,6 @@ static int __init net_dev_init(void)
if (netdev_kobject_init())
goto out;
- INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
@@ -11727,7 +11934,13 @@ static int __init net_dev_init(void)
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+ INIT_LIST_HEAD(&sd->backlog.poll_list);
+
+ if (net_page_pool_create(i))
+ goto out;
}
+ if (use_backlog_threads())
+ smpboot_register_percpu_thread(&backlog_threads);
dev_boot_phase = 0;
@@ -11753,7 +11966,24 @@ static int __init net_dev_init(void)
NULL, dev_cpu_dead);
WARN_ON(rc < 0);
rc = 0;
+
+ /* avoid static key IPIs to isolated CPUs */
+ if (housekeeping_enabled(HK_TYPE_MISC))
+ net_enable_timestamp();
out:
+ if (rc < 0) {
+ for_each_possible_cpu(i) {
+ struct page_pool *pp_ptr;
+
+ pp_ptr = per_cpu(system_page_pool, i);
+ if (!pp_ptr)
+ continue;
+
+ page_pool_destroy(pp_ptr);
+ per_cpu(system_page_pool, i) = NULL;
+ }
+ }
+
return rc;
}
diff --git a/net/core/dev.h b/net/core/dev.h
index 7480b4c842..b7b518bc2b 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -3,11 +3,10 @@
#define _NET_CORE_DEV_H
#include <linux/types.h>
+#include <linux/rwsem.h>
+#include <linux/netdevice.h>
struct net;
-struct net_device;
-struct netdev_bpf;
-struct netdev_phys_item_id;
struct netlink_ext_ack;
struct cpumask;
@@ -37,15 +36,13 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern int netdev_budget;
-extern unsigned int netdev_budget_usecs;
-extern unsigned int sysctl_skb_defer_max;
-extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
extern int dev_weight_tx_bias;
+extern struct rw_semaphore dev_addr_sem;
+
/* rtnl helpers */
extern struct list_head net_todo_list;
void netdev_run_todo(void);
@@ -56,6 +53,7 @@ struct netdev_name_node {
struct list_head list;
struct net_device *dev;
const char *name;
+ struct rcu_head rcu;
};
int netdev_get_name(struct net *net, char *name, int ifindex);
@@ -149,4 +147,23 @@ static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
struct napi_struct *napi_by_id(unsigned int napi_id);
+void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
+
+#define XMIT_RECURSION_LIMIT 8
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+ XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ __this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.recursion);
+}
+
#endif
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index 4dbd0dc6ae..8e1dba825e 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -49,7 +49,6 @@ static int dev_addr_test_init(struct kunit *test)
KUNIT_FAIL(test, "Can't register netdev %d", err);
}
- rtnl_lock();
return 0;
}
@@ -57,7 +56,6 @@ static void dev_addr_test_exit(struct kunit *test)
{
struct net_device *netdev = test->priv;
- rtnl_unlock();
unregister_netdev(netdev);
free_netdev(netdev);
}
@@ -67,6 +65,7 @@ static void dev_addr_test_basic(struct kunit *test)
struct net_device *netdev = test->priv;
u8 addr[ETH_ALEN];
+ rtnl_lock();
KUNIT_EXPECT_TRUE(test, !!netdev->dev_addr);
memset(addr, 2, sizeof(addr));
@@ -76,6 +75,7 @@ static void dev_addr_test_basic(struct kunit *test)
memset(addr, 3, sizeof(addr));
dev_addr_set(netdev, addr);
KUNIT_EXPECT_MEMEQ(test, netdev->dev_addr, addr, sizeof(addr));
+ rtnl_unlock();
}
static void dev_addr_test_sync_one(struct kunit *test)
@@ -86,6 +86,7 @@ static void dev_addr_test_sync_one(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
memset(addr, 1, sizeof(addr));
eth_hw_addr_set(netdev, addr);
@@ -103,6 +104,7 @@ static void dev_addr_test_sync_one(struct kunit *test)
* considered synced and we overwrite in place.
*/
KUNIT_EXPECT_EQ(test, 0, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_add_del(struct kunit *test)
@@ -114,6 +116,7 @@ static void dev_addr_test_add_del(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
for (i = 1; i < 4; i++) {
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, 0, dev_addr_add(netdev, addr,
@@ -143,6 +146,7 @@ static void dev_addr_test_add_del(struct kunit *test)
__hw_addr_sync_dev(&netdev->dev_addrs, netdev, dev_addr_test_sync,
dev_addr_test_unsync);
KUNIT_EXPECT_EQ(test, 1, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_del_main(struct kunit *test)
@@ -150,6 +154,7 @@ static void dev_addr_test_del_main(struct kunit *test)
struct net_device *netdev = test->priv;
u8 addr[ETH_ALEN];
+ rtnl_lock();
memset(addr, 1, sizeof(addr));
eth_hw_addr_set(netdev, addr);
@@ -161,6 +166,7 @@ static void dev_addr_test_del_main(struct kunit *test)
NETDEV_HW_ADDR_T_LAN));
KUNIT_EXPECT_EQ(test, -ENOENT, dev_addr_del(netdev, addr,
NETDEV_HW_ADDR_T_LAN));
+ rtnl_unlock();
}
static void dev_addr_test_add_set(struct kunit *test)
@@ -172,6 +178,7 @@ static void dev_addr_test_add_set(struct kunit *test)
datp = netdev_priv(netdev);
+ rtnl_lock();
/* There is no external API like dev_addr_add_excl(),
* so shuffle the tree a little bit and exploit aliasing.
*/
@@ -191,6 +198,7 @@ static void dev_addr_test_add_set(struct kunit *test)
__hw_addr_sync_dev(&netdev->dev_addrs, netdev, dev_addr_test_sync,
dev_addr_test_unsync);
KUNIT_EXPECT_EQ(test, 0xffff, datp->addr_seen);
+ rtnl_unlock();
}
static void dev_addr_test_add_excl(struct kunit *test)
@@ -199,6 +207,7 @@ static void dev_addr_test_add_excl(struct kunit *test)
u8 addr[ETH_ALEN];
int i;
+ rtnl_lock();
for (i = 0; i < 10; i++) {
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, 0, dev_uc_add_excl(netdev, addr));
@@ -213,6 +222,7 @@ static void dev_addr_test_add_excl(struct kunit *test)
memset(addr, i, sizeof(addr));
KUNIT_EXPECT_EQ(test, -EEXIST, dev_uc_add_excl(netdev, addr));
}
+ rtnl_unlock();
}
static struct kunit_case dev_addr_test_cases[] = {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b0f221d658..430ed18f85 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -74,7 +74,7 @@ struct net_dm_hw_entries {
};
struct per_cpu_dm_data {
- spinlock_t lock; /* Protects 'skb', 'hw_entries' and
+ raw_spinlock_t lock; /* Protects 'skb', 'hw_entries' and
* 'send_timer'
*/
union {
@@ -168,9 +168,9 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
err:
mod_timer(&data->send_timer, jiffies + HZ / 10);
out:
- spin_lock_irqsave(&data->lock, flags);
+ raw_spin_lock_irqsave(&data->lock, flags);
swap(data->skb, skb);
- spin_unlock_irqrestore(&data->lock, flags);
+ raw_spin_unlock_irqrestore(&data->lock, flags);
if (skb) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
@@ -225,7 +225,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
local_irq_save(flags);
data = this_cpu_ptr(&dm_cpu_data);
- spin_lock(&data->lock);
+ raw_spin_lock(&data->lock);
dskb = data->skb;
if (!dskb)
@@ -259,7 +259,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
}
out:
- spin_unlock_irqrestore(&data->lock, flags);
+ raw_spin_unlock_irqrestore(&data->lock, flags);
}
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
@@ -314,9 +314,9 @@ net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
}
- spin_lock_irqsave(&hw_data->lock, flags);
+ raw_spin_lock_irqsave(&hw_data->lock, flags);
swap(hw_data->hw_entries, hw_entries);
- spin_unlock_irqrestore(&hw_data->lock, flags);
+ raw_spin_unlock_irqrestore(&hw_data->lock, flags);
return hw_entries;
}
@@ -448,7 +448,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
return;
hw_data = this_cpu_ptr(&dm_hw_cpu_data);
- spin_lock_irqsave(&hw_data->lock, flags);
+ raw_spin_lock_irqsave(&hw_data->lock, flags);
hw_entries = hw_data->hw_entries;
if (!hw_entries)
@@ -477,7 +477,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
}
out:
- spin_unlock_irqrestore(&hw_data->lock, flags);
+ raw_spin_unlock_irqrestore(&hw_data->lock, flags);
}
static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
@@ -1673,7 +1673,7 @@ static struct notifier_block dropmon_net_notifier = {
static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
{
- spin_lock_init(&data->lock);
+ raw_spin_lock_init(&data->lock);
skb_queue_head_init(&data->drop_queue);
u64_stats_init(&data->stats.syncp);
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 6838d3212c..95f533844f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -96,7 +96,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
}
EXPORT_SYMBOL(dst_alloc);
-struct dst_entry *dst_destroy(struct dst_entry * dst)
+static void dst_destroy(struct dst_entry *dst)
{
struct dst_entry *child = NULL;
@@ -126,15 +126,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
dst = child;
if (dst)
dst_release_immediate(dst);
- return NULL;
}
-EXPORT_SYMBOL(dst_destroy);
static void dst_destroy_rcu(struct rcu_head *head)
{
struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
- dst = dst_destroy(dst);
+ dst_destroy(dst);
}
/* Operations to mark dst as DEAD and clean up the net device referenced
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 0ccfd5fa5c..70c634b9e7 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -27,6 +27,7 @@ struct dst_cache_pcpu {
static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
dst_release(dst_cache->dst);
if (dst)
dst_hold(dst);
@@ -40,6 +41,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
{
struct dst_entry *dst;
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
dst = idst->dst;
if (!dst)
goto fail;
@@ -47,7 +49,8 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
- if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+ if (unlikely(!time_after(idst->refresh_ts,
+ READ_ONCE(dst_cache->reset_ts)) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
@@ -83,7 +86,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
return NULL;
*saddr = idst->in_saddr.s_addr;
- return container_of(dst, struct rtable, dst);
+ return dst_rtable(dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
@@ -111,8 +114,8 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
return;
idst = this_cpu_ptr(dst_cache->cache);
- dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
- rt6_get_cookie((struct rt6_info *)dst));
+ dst_cache_per_cpu_dst_set(idst, dst,
+ rt6_get_cookie(dst_rt6_info(dst)));
idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
@@ -170,7 +173,7 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
if (!dst_cache->cache)
return;
- dst_cache->reset_ts = jiffies;
+ dst_cache_reset(dst_cache);
for_each_possible_cpu(i) {
struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
struct dst_entry *dst = idst->dst;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3f933ffcef..6ebffbc632 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1142,10 +1142,10 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct fib_rules_ops *ops;
- int idx = 0, family;
+ int err, idx = 0, family;
if (cb->strict_check) {
- int err = fib_valid_dumprule_req(nlh, cb->extack);
+ err = fib_valid_dumprule_req(nlh, cb->extack);
if (err < 0)
return err;
@@ -1158,17 +1158,17 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
if (ops == NULL)
return -EAFNOSUPPORT;
- dump_rules(skb, cb, ops);
-
- return skb->len;
+ return dump_rules(skb, cb, ops);
}
+ err = 0;
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->rules_ops, list) {
if (idx < cb->args[0] || !try_module_get(ops->owner))
goto skip;
- if (dump_rules(skb, cb, ops) < 0)
+ err = dump_rules(skb, cb, ops);
+ if (err < 0)
break;
cb->args[1] = 0;
@@ -1178,7 +1178,7 @@ skip:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static void notify_rule_change(int event, struct fib_rule *rule,
@@ -1293,7 +1293,8 @@ static int __init fib_rules_init(void)
int err;
rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule,
+ RTNL_FLAG_DUMP_UNLOCKED);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index ef3e78b6a3..ab0455c64e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,8 +87,11 @@
#include "dev.h"
+/* Keep the struct bpf_fib_lookup small so that it fits into a cacheline */
+static_assert(sizeof(struct bpf_fib_lookup) == 64, "struct bpf_fib_lookup size check");
+
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id);
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
@@ -778,7 +781,7 @@ jmp_rest:
BPF_EMIT_JMP;
break;
- /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
+ /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B: {
struct sock_filter tmp = {
.code = BPF_LD | BPF_ABS | BPF_B,
@@ -804,7 +807,7 @@ jmp_rest:
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
}
- /* RET_K is remaped into 2 insns. RET_A case doesn't need an
+ /* RET_K is remapped into 2 insns. RET_A case doesn't need an
* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
*/
case BPF_RET | BPF_A:
@@ -1662,6 +1665,11 @@ static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
static inline int __bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
+#ifdef CONFIG_DEBUG_NET
+ /* Avoid a splat in pskb_may_pull_reason() */
+ if (write_len > INT_MAX)
+ return -EINVAL;
+#endif
return skb_ensure_writable(skb, write_len);
}
@@ -2215,7 +2223,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
- nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ nexthop = rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr);
} else {
nexthop = &nh->ipv6_nh;
@@ -2271,12 +2279,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
err = bpf_out_neigh_v6(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
goto out_xmit;
out_drop:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
out_xmit:
return ret;
@@ -2314,8 +2322,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct rtable *rt = skb_rtable(skb);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
@@ -2378,12 +2385,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
err = bpf_out_neigh_v4(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
goto out_xmit;
out_drop:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
out_xmit:
return ret;
@@ -2968,7 +2975,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*
* Then if B is non-zero AND there is no space allocate space and
* compact A, B regions into page. If there is space shift ring to
- * the rigth free'ing the next element in ring to place B, leaving
+ * the right free'ing the next element in ring to place B, leaving
* A untouched except to reduce length.
*/
if (start != offset) {
@@ -3537,13 +3544,20 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* Due to header grow, MSS needs to be downgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_decrease_gso_size(shinfo, len_diff);
-
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
+
+ /* Due to header growth, MSS needs to be downgraded.
+ * There is a BUG_ON() when segmenting the frag_list with
+ * head_frag true, so linearize the skb after downgrading
+ * the MSS.
+ */
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
+ skb_decrease_gso_size(shinfo, len_diff);
+ if (shinfo->frag_list)
+ return skb_linearize(skb);
+ }
}
return 0;
@@ -4360,10 +4374,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (unlikely(!xdpf)) {
@@ -4375,11 +4391,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_enqueue_multi(xdpf, dev, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_enqueue(fwd, xdpf, dev);
}
@@ -4442,9 +4467,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog,
- void *fwd,
- enum bpf_map_type map_type, u32 map_id)
+ struct bpf_prog *xdp_prog, void *fwd,
+ enum bpf_map_type map_type, u32 map_id,
+ u32 flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map;
@@ -4454,11 +4479,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
}
@@ -4495,9 +4529,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
@@ -4517,7 +4553,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return 0;
}
- return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
+ return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;
@@ -4662,7 +4698,7 @@ set_compat:
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
if (flags & BPF_F_TUNINFO_FLAGS)
- to->tunnel_flags = info->key.tun_flags;
+ to->tunnel_flags = ip_tunnel_flags_to_be16(info->key.tun_flags);
else
to->tunnel_ext = 0;
@@ -4705,7 +4741,7 @@ BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
int err;
if (unlikely(!info ||
- !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
+ !ip_tunnel_is_options_present(info->key.tun_flags))) {
err = -ENOENT;
goto err_clear;
}
@@ -4775,15 +4811,15 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
- if (flags & BPF_F_DONT_FRAGMENT)
- info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
- if (flags & BPF_F_SEQ_NUMBER)
- info->key.tun_flags |= TUNNEL_SEQ;
- if (flags & BPF_F_NO_TUNNEL_KEY)
- info->key.tun_flags &= ~TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags);
+ __assign_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags,
+ flags & BPF_F_DONT_FRAGMENT);
+ __assign_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags,
+ !(flags & BPF_F_ZERO_CSUM_TX));
+ __assign_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags,
+ flags & BPF_F_SEQ_NUMBER);
+ __assign_bit(IP_TUNNEL_KEY_BIT, info->key.tun_flags,
+ !(flags & BPF_F_NO_TUNNEL_KEY));
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -4821,13 +4857,15 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
const struct metadata_dst *md = this_cpu_ptr(md_dst);
+ IP_TUNNEL_DECLARE_FLAGS(present) = { };
if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
return -EINVAL;
if (unlikely(size > IP_TUNNEL_OPTS_MAX))
return -ENOMEM;
- ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
+ ip_tunnel_set_options_present(present);
+ ip_tunnel_info_opts_set(info, from, size, present);
return 0;
}
@@ -5884,7 +5922,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
- fl4.flowi4_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl4.flowi4_mark = params->mark;
+ else
+ fl4.flowi4_mark = 0;
fl4.flowi4_secid = 0;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -5988,7 +6029,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
return -ENODEV;
idev = __in6_dev_get_safely(dev);
- if (unlikely(!idev || !idev->cnf.forwarding))
+ if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding)))
return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
@@ -6027,7 +6068,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
strict);
} else {
- fl6.flowi6_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl6.flowi6_mark = params->mark;
+ else
+ fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -6105,7 +6149,7 @@ set_fwd_params:
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
- BPF_FIB_LOOKUP_SRC)
+ BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
@@ -7894,7 +7938,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -7987,7 +8031,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8006,7 +8050,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8193,7 +8237,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8252,13 +8296,13 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
/* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
* kfuncs are defined in two different modules, and we want to be able
- * to use them interchangably with the same BTF type ID. Because modules
+ * to use them interchangeably with the same BTF type ID. Because modules
* can't de-duplicate BTF IDs between each other, we need the type to be
* referenced in the vmlinux BTF or the verifier will get confused about
* the different types. So we add this dummy type reference which will
@@ -8313,7 +8357,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8342,8 +8386,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_event_output_data_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
@@ -8355,7 +8397,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8399,7 +8441,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8410,7 +8452,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8437,7 +8479,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8612,7 +8654,7 @@ static bool cg_skb_is_valid_access(int off, int size,
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
}
@@ -8624,7 +8666,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
case bpf_ctx_range(struct __sk_buff, tstamp):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
default:
@@ -11268,7 +11310,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -11450,7 +11492,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -11784,7 +11826,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = {
};
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id)
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
const struct bpf_func_proto *func;
@@ -11813,10 +11855,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
return func;
@@ -11869,6 +11911,103 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
return 0;
}
+
+__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz)
+{
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ const struct request_sock_ops *ops;
+ struct inet_request_sock *ireq;
+ struct tcp_request_sock *treq;
+ struct request_sock *req;
+ struct net *net;
+ __u16 min_mss;
+ u32 tsoff = 0;
+
+ if (attrs__sz != sizeof(*attrs) ||
+ attrs->reserved[0] || attrs->reserved[1] || attrs->reserved[2])
+ return -EINVAL;
+
+ if (!skb_at_tc_ingress(skb))
+ return -EINVAL;
+
+ net = dev_net(skb->dev);
+ if (net != sock_net(sk))
+ return -ENETUNREACH;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ops = &tcp_request_sock_ops;
+ min_mss = 536;
+ break;
+#if IS_BUILTIN(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ ops = &tcp6_request_sock_ops;
+ min_mss = IPV6_MIN_MTU - 60;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN ||
+ sk_is_mptcp(sk))
+ return -EINVAL;
+
+ if (attrs->mss < min_mss)
+ return -EINVAL;
+
+ if (attrs->wscale_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling))
+ return -EINVAL;
+
+ if (attrs->snd_wscale > TCP_MAX_WSCALE ||
+ attrs->rcv_wscale > TCP_MAX_WSCALE)
+ return -EINVAL;
+ }
+
+ if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
+ return -EINVAL;
+
+ if (attrs->tstamp_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
+ return -EINVAL;
+
+ tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(attrs->usec_ts_ok, tcp_clock_ns());
+ }
+
+ req = inet_reqsk_alloc(ops, sk, false);
+ if (!req)
+ return -ENOMEM;
+
+ ireq = inet_rsk(req);
+ treq = tcp_rsk(req);
+
+ req->rsk_listener = sk;
+ req->syncookie = 1;
+ req->mss = attrs->mss;
+ req->ts_recent = attrs->rcv_tsval;
+
+ ireq->snd_wscale = attrs->snd_wscale;
+ ireq->rcv_wscale = attrs->rcv_wscale;
+ ireq->tstamp_ok = !!attrs->tstamp_ok;
+ ireq->sack_ok = !!attrs->sack_ok;
+ ireq->wscale_ok = !!attrs->wscale_ok;
+ ireq->ecn_ok = !!attrs->ecn_ok;
+
+ treq->req_usec_ts = !!attrs->usec_ts_ok;
+ treq->ts_off = tsoff;
+
+ skb_orphan(skb);
+ skb->sk = req_to_sk(req);
+ skb->destructor = sock_pfree;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
@@ -11885,17 +12024,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
return 0;
}
-BTF_SET8_START(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
-BTF_SET8_END(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
-BTF_SET8_START(bpf_kfunc_check_set_xdp)
+BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
-BTF_SET8_END(bpf_kfunc_check_set_xdp)
+BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
-BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
+BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
-BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr)
+
+BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk)
+BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk)
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
@@ -11912,6 +12055,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
.set = &bpf_kfunc_check_set_sock_addr,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_tcp_reqsk,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -11927,8 +12075,9 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- &bpf_kfunc_set_sock_addr);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ &bpf_kfunc_set_sock_addr);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
}
late_initcall(bpf_kfunc_init);
@@ -11968,9 +12117,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
__bpf_kfunc_end_defs();
-BTF_SET8_START(bpf_sk_iter_kfunc_ids)
+BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
-BTF_SET8_END(bpf_sk_iter_kfunc_ids)
+BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids)
static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 272f092513..7b54f44f53 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -455,17 +455,25 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
struct flow_dissector_key_enc_opts *enc_opt;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+ u32 val;
enc_opt = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS,
target_container);
- if (info->options_len) {
- enc_opt->len = info->options_len;
- ip_tunnel_info_opts_get(enc_opt->data, info);
- enc_opt->dst_opt_type = info->key.tun_flags &
- TUNNEL_OPTIONS_PRESENT;
- }
+ if (!info->options_len)
+ return;
+
+ enc_opt->len = info->options_len;
+ ip_tunnel_info_opts_get(enc_opt->data, info);
+
+ ip_tunnel_set_options_present(flags);
+ ip_tunnel_flags_and(flags, info->key.tun_flags, flags);
+
+ val = find_next_bit(flags, __IP_TUNNEL_FLAG_NUM,
+ IP_TUNNEL_GENEVE_OPT_BIT);
+ enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0;
}
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
@@ -1093,7 +1101,7 @@ bool __skb_flow_dissect(const struct net *net,
}
}
- WARN_ON_ONCE(!net);
+ DEBUG_NET_WARN_ON_ONCE(!net);
if (net) {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog_array *run_array;
diff --git a/net/core/gro.c b/net/core/gro.c
index cefddf65f7..b3b43de1a6 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -3,6 +3,7 @@
#include <net/dst_metadata.h>
#include <net/busy_poll.h>
#include <trace/events/net.h>
+#include <linux/skbuff_ref.h>
#define MAX_GRO_SKBS 8
@@ -10,9 +11,6 @@
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(offload_lock);
-struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
-/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
-int gro_normal_batch __read_mostly = 8;
/**
* dev_add_offload - register offload handlers
@@ -31,7 +29,7 @@ void dev_add_offload(struct packet_offload *po)
struct packet_offload *elem;
spin_lock(&offload_lock);
- list_for_each_entry(elem, &offload_base, list) {
+ list_for_each_entry(elem, &net_hotdata.offload_base, list) {
if (po->priority < elem->priority)
break;
}
@@ -55,7 +53,7 @@ EXPORT_SYMBOL(dev_add_offload);
*/
static void __dev_remove_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *po1;
spin_lock(&offload_lock);
@@ -233,12 +231,39 @@ done:
return 0;
}
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &offload_base;
int err = -ENOENT;
BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -333,8 +358,6 @@ static void gro_list_prepare(const struct list_head *head,
list_for_each_entry(p, head, list) {
unsigned long diffs;
- NAPI_GRO_CB(p)->flush = 0;
-
if (hash != skb_get_hash_raw(p)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -370,15 +393,22 @@ static void gro_list_prepare(const struct list_head *head,
static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
{
- const struct skb_shared_info *pinfo = skb_shinfo(skb);
- const skb_frag_t *frag0 = &pinfo->frags[0];
+ const struct skb_shared_info *pinfo;
+ const skb_frag_t *frag0;
+ unsigned int headlen;
+ NAPI_GRO_CB(skb)->network_offset = 0;
NAPI_GRO_CB(skb)->data_offset = 0;
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ headlen = skb_headlen(skb);
+ NAPI_GRO_CB(skb)->frag0 = skb->data;
+ NAPI_GRO_CB(skb)->frag0_len = headlen;
+ if (headlen)
+ return;
- if (!skb_headlen(skb) && pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0)) &&
+ pinfo = skb_shinfo(skb);
+ frag0 = &pinfo->frags[0];
+
+ if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) &&
(!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
@@ -439,7 +469,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
{
u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
struct gro_list *gro_list = &napi->gro_hash[bucket];
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
struct sk_buff *pp = NULL;
@@ -467,7 +497,6 @@ found_ptype:
sizeof(u32))); /* Avoid slow unaligned acc */
*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
@@ -545,7 +574,7 @@ normal:
struct packet_offload *gro_find_receive_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
@@ -559,7 +588,7 @@ EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
@@ -701,7 +730,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
skb_reset_mac_header(skb);
skb_gro_reset_offset(skb, hlen);
- if (unlikely(skb_gro_header_hard(skb, hlen))) {
+ if (unlikely(!skb_gro_may_pull(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
if (unlikely(!eth)) {
net_warn_ratelimited("%s: dropping impossible skb from %s\n",
@@ -711,7 +740,10 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
}
} else {
eth = (const struct ethhdr *)skb->data;
- gro_pull_from_frag0(skb, hlen);
+
+ if (NAPI_GRO_CB(skb)->frag0 != skb->data)
+ gro_pull_from_frag0(skb, hlen);
+
NAPI_GRO_CB(skb)->frag0 += hlen;
NAPI_GRO_CB(skb)->frag0_len -= hlen;
}
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index ed5ec5de47..ff8e5b64bf 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <net/gro_cells.h>
+#include <net/hotdata.h>
struct gro_cell {
struct sk_buff_head napi_skbs;
@@ -26,7 +27,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/gso.c b/net/core/gso.c
index 9e1803bfc9..bcd156372f 100644
--- a/net/core/gso.c
+++ b/net/core/gso.c
@@ -17,7 +17,7 @@ struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
struct packet_offload *ptype;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
@@ -48,7 +48,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
__skb_pull(skb, vlan_depth);
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
diff --git a/net/core/gso_test.c b/net/core/gso_test.c
deleted file mode 100644
index 358c44680d..0000000000
--- a/net/core/gso_test.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <kunit/test.h>
-#include <linux/skbuff.h>
-
-static const char hdr[] = "abcdefgh";
-#define GSO_TEST_SIZE 1000
-
-static void __init_skb(struct sk_buff *skb)
-{
- skb_reset_mac_header(skb);
- memcpy(skb_mac_header(skb), hdr, sizeof(hdr));
-
- /* skb_segment expects skb->data at start of payload */
- skb_pull(skb, sizeof(hdr));
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
-
- /* proto is arbitrary, as long as not ETH_P_TEB or vlan */
- skb->protocol = htons(ETH_P_ATALK);
- skb_shinfo(skb)->gso_size = GSO_TEST_SIZE;
-}
-
-enum gso_test_nr {
- GSO_TEST_LINEAR,
- GSO_TEST_NO_GSO,
- GSO_TEST_FRAGS,
- GSO_TEST_FRAGS_PURE,
- GSO_TEST_GSO_PARTIAL,
- GSO_TEST_FRAG_LIST,
- GSO_TEST_FRAG_LIST_PURE,
- GSO_TEST_FRAG_LIST_NON_UNIFORM,
- GSO_TEST_GSO_BY_FRAGS,
-};
-
-struct gso_test_case {
- enum gso_test_nr id;
- const char *name;
-
- /* input */
- unsigned int linear_len;
- unsigned int nr_frags;
- const unsigned int *frags;
- unsigned int nr_frag_skbs;
- const unsigned int *frag_skbs;
-
- /* output as expected */
- unsigned int nr_segs;
- const unsigned int *segs;
-};
-
-static struct gso_test_case cases[] = {
- {
- .id = GSO_TEST_NO_GSO,
- .name = "no_gso",
- .linear_len = GSO_TEST_SIZE,
- .nr_segs = 1,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE },
- },
- {
- .id = GSO_TEST_LINEAR,
- .name = "linear",
- .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1,
- .nr_segs = 3,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
- },
- {
- .id = GSO_TEST_FRAGS,
- .name = "frags",
- .linear_len = GSO_TEST_SIZE,
- .nr_frags = 2,
- .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 },
- .nr_segs = 3,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
- },
- {
- .id = GSO_TEST_FRAGS_PURE,
- .name = "frags_pure",
- .nr_frags = 3,
- .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
- .nr_segs = 3,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
- },
- {
- .id = GSO_TEST_GSO_PARTIAL,
- .name = "gso_partial",
- .linear_len = GSO_TEST_SIZE,
- .nr_frags = 2,
- .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 },
- .nr_segs = 2,
- .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 },
- },
- {
- /* commit 89319d3801d1: frag_list on mss boundaries */
- .id = GSO_TEST_FRAG_LIST,
- .name = "frag_list",
- .linear_len = GSO_TEST_SIZE,
- .nr_frag_skbs = 2,
- .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
- .nr_segs = 3,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE },
- },
- {
- .id = GSO_TEST_FRAG_LIST_PURE,
- .name = "frag_list_pure",
- .nr_frag_skbs = 2,
- .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
- .nr_segs = 2,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
- },
- {
- /* commit 43170c4e0ba7: GRO of frag_list trains */
- .id = GSO_TEST_FRAG_LIST_NON_UNIFORM,
- .name = "frag_list_non_uniform",
- .linear_len = GSO_TEST_SIZE,
- .nr_frag_skbs = 4,
- .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 },
- .nr_segs = 4,
- .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 },
- },
- {
- /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and
- * commit 90017accff61 ("sctp: Add GSO support")
- *
- * "there will be a cover skb with protocol headers and
- * children ones containing the actual segments"
- */
- .id = GSO_TEST_GSO_BY_FRAGS,
- .name = "gso_by_frags",
- .nr_frag_skbs = 4,
- .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 },
- .nr_segs = 4,
- .segs = (const unsigned int[]) { 100, 200, 300, 400 },
- },
-};
-
-static void gso_test_case_to_desc(struct gso_test_case *t, char *desc)
-{
- sprintf(desc, "%s", t->name);
-}
-
-KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc);
-
-static void gso_test_func(struct kunit *test)
-{
- const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- struct sk_buff *skb, *segs, *cur, *next, *last;
- const struct gso_test_case *tcase;
- netdev_features_t features;
- struct page *page;
- int i;
-
- tcase = test->param_value;
-
- page = alloc_page(GFP_KERNEL);
- KUNIT_ASSERT_NOT_NULL(test, page);
- skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size);
- KUNIT_ASSERT_NOT_NULL(test, skb);
- __skb_put(skb, sizeof(hdr) + tcase->linear_len);
-
- __init_skb(skb);
-
- if (tcase->nr_frags) {
- unsigned int pg_off = 0;
-
- page = alloc_page(GFP_KERNEL);
- KUNIT_ASSERT_NOT_NULL(test, page);
- page_ref_add(page, tcase->nr_frags - 1);
-
- for (i = 0; i < tcase->nr_frags; i++) {
- skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]);
- pg_off += tcase->frags[i];
- }
-
- KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE);
-
- skb->data_len = pg_off;
- skb->len += skb->data_len;
- skb->truesize += skb->data_len;
- }
-
- if (tcase->frag_skbs) {
- unsigned int total_size = 0, total_true_size = 0;
- struct sk_buff *frag_skb, *prev = NULL;
-
- for (i = 0; i < tcase->nr_frag_skbs; i++) {
- unsigned int frag_size;
-
- page = alloc_page(GFP_KERNEL);
- KUNIT_ASSERT_NOT_NULL(test, page);
-
- frag_size = tcase->frag_skbs[i];
- frag_skb = build_skb(page_address(page),
- frag_size + shinfo_size);
- KUNIT_ASSERT_NOT_NULL(test, frag_skb);
- __skb_put(frag_skb, frag_size);
-
- if (prev)
- prev->next = frag_skb;
- else
- skb_shinfo(skb)->frag_list = frag_skb;
- prev = frag_skb;
-
- total_size += frag_size;
- total_true_size += frag_skb->truesize;
- }
-
- skb->len += total_size;
- skb->data_len += total_size;
- skb->truesize += total_true_size;
-
- if (tcase->id == GSO_TEST_GSO_BY_FRAGS)
- skb_shinfo(skb)->gso_size = GSO_BY_FRAGS;
- }
-
- features = NETIF_F_SG | NETIF_F_HW_CSUM;
- if (tcase->id == GSO_TEST_GSO_PARTIAL)
- features |= NETIF_F_GSO_PARTIAL;
-
- /* TODO: this should also work with SG,
- * rather than hit BUG_ON(i >= nfrags)
- */
- if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM)
- features &= ~NETIF_F_SG;
-
- segs = skb_segment(skb, features);
- if (IS_ERR(segs)) {
- KUNIT_FAIL(test, "segs error %pe", segs);
- goto free_gso_skb;
- } else if (!segs) {
- KUNIT_FAIL(test, "no segments");
- goto free_gso_skb;
- }
-
- last = segs->prev;
- for (cur = segs, i = 0; cur; cur = next, i++) {
- next = cur->next;
-
- KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]);
-
- /* segs have skb->data pointing to the mac header */
- KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data);
- KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr));
-
- /* header was copied to all segs */
- KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0);
-
- /* last seg can be found through segs->prev pointer */
- if (!next)
- KUNIT_ASSERT_PTR_EQ(test, cur, last);
-
- consume_skb(cur);
- }
-
- KUNIT_ASSERT_EQ(test, i, tcase->nr_segs);
-
-free_gso_skb:
- consume_skb(skb);
-}
-
-static struct kunit_case gso_test_cases[] = {
- KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params),
- {}
-};
-
-static struct kunit_suite gso_test_suite = {
- .name = "net_core_gso",
- .test_cases = gso_test_cases,
-};
-
-kunit_test_suite(gso_test_suite);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("KUnit tests for segmentation offload");
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
new file mode 100644
index 0000000000..d0aaaaa556
--- /dev/null
+++ b/net/core/hotdata.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
+
+struct net_hotdata net_hotdata __cacheline_aligned = {
+ .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
+ .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
+ .gro_normal_batch = 8,
+
+ .netdev_budget = 300,
+ /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
+ .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ,
+
+ .tstamp_prequeue = 1,
+ .max_backlog = 1000,
+ .dev_tx_weight = 64,
+ .dev_rx_weight = 64,
+ .sysctl_max_skb_frags = MAX_SKB_FRAGS,
+ .sysctl_skb_defer_max = 64,
+ .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
+};
+EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
new file mode 100644
index 0000000000..759a9b9f3f
--- /dev/null
+++ b/net/core/ieee8021q_helpers.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/array_size.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <net/dscp.h>
+#include <net/ieee8021q.h>
+
+/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
+ * different number of queues as shown in the example provided by
+ * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
+ * Table I-1 "Traffic type to traffic class mapping".
+ */
+static const u8 ieee8021q_8queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4,
+ [IEEE8021Q_TT_VO] = 5,
+ [IEEE8021Q_TT_IC] = 6,
+ [IEEE8021Q_TT_NC] = 7,
+};
+
+static const u8 ieee8021q_7queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4, [IEEE8021Q_TT_VO] = 4,
+ [IEEE8021Q_TT_IC] = 5,
+ [IEEE8021Q_TT_NC] = 6,
+};
+
+static const u8 ieee8021q_6queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2, [IEEE8021Q_TT_CA] = 2,
+ [IEEE8021Q_TT_VI] = 3, [IEEE8021Q_TT_VO] = 3,
+ [IEEE8021Q_TT_IC] = 4,
+ [IEEE8021Q_TT_NC] = 5,
+};
+
+static const u8 ieee8021q_5queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3,
+ [IEEE8021Q_TT_NC] = 4,
+};
+
+static const u8 ieee8021q_4queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3, [IEEE8021Q_TT_NC] = 3,
+};
+
+static const u8 ieee8021q_3queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 2, [IEEE8021Q_TT_NC] = 2,
+};
+
+static const u8 ieee8021q_2queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 1, [IEEE8021Q_TT_NC] = 1,
+};
+
+static const u8 ieee8021q_1queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 0, [IEEE8021Q_TT_VO] = 0,
+ [IEEE8021Q_TT_IC] = 0, [IEEE8021Q_TT_NC] = 0,
+};
+
+/**
+ * ieee8021q_tt_to_tc - Map IEEE 802.1Q Traffic Type to Traffic Class
+ * @tt: IEEE 802.1Q Traffic Type
+ * @num_queues: Number of queues
+ *
+ * This function maps an IEEE 802.1Q Traffic Type to a Traffic Class (TC) based
+ * on the number of queues configured on the NIC. The mapping is based on the
+ * example provided by IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic
+ * class mapping" and Table I-1 "Traffic type to traffic class mapping".
+ *
+ * Return: Traffic Class corresponding to the given Traffic Type or negative
+ * value in case of error.
+ */
+int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
+{
+ if (tt < 0 || tt >= IEEE8021Q_TT_MAX) {
+ pr_err("Requested Traffic Type (%d) is out of range (%d)\n", tt,
+ IEEE8021Q_TT_MAX);
+ return -EINVAL;
+ }
+
+ switch (num_queues) {
+ case 8:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_8queue_tt_tc_map != max - 1");
+ return ieee8021q_8queue_tt_tc_map[tt];
+ case 7:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_7queue_tt_tc_map != max - 1");
+
+ return ieee8021q_7queue_tt_tc_map[tt];
+ case 6:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_6queue_tt_tc_map != max - 1");
+
+ return ieee8021q_6queue_tt_tc_map[tt];
+ case 5:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_5queue_tt_tc_map != max - 1");
+
+ return ieee8021q_5queue_tt_tc_map[tt];
+ case 4:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_4queue_tt_tc_map != max - 1");
+
+ return ieee8021q_4queue_tt_tc_map[tt];
+ case 3:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_3queue_tt_tc_map != max - 1");
+
+ return ieee8021q_3queue_tt_tc_map[tt];
+ case 2:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_2queue_tt_tc_map != max - 1");
+
+ return ieee8021q_2queue_tt_tc_map[tt];
+ case 1:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_1queue_tt_tc_map != max - 1");
+
+ return ieee8021q_1queue_tt_tc_map[tt];
+ }
+
+ pr_err("Invalid number of queues %d\n", num_queues);
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(ieee8021q_tt_to_tc);
+
+/**
+ * ietf_dscp_to_ieee8021q_tt - Map IETF DSCP to IEEE 802.1Q Traffic Type
+ * @dscp: IETF DSCP value
+ *
+ * This function maps an IETF DSCP value to an IEEE 802.1Q Traffic Type (TT).
+ * Since there is no corresponding mapping between DSCP and IEEE 802.1Q Traffic
+ * Type, this function is inspired by the RFC8325 documentation which describe
+ * the mapping between DSCP and 802.11 User Priority (UP) values.
+ *
+ * Return: IEEE 802.1Q Traffic Type corresponding to the given DSCP value
+ */
+int ietf_dscp_to_ieee8021q_tt(u8 dscp)
+{
+ switch (dscp) {
+ case DSCP_CS0:
+ /* Comment from RFC8325:
+ * [RFC4594], Section 4.8, recommends High-Throughput Data be marked
+ * AF1x (that is, AF11, AF12, and AF13, according to the rules defined
+ * in [RFC2475]).
+ *
+ * By default (as described in Section 2.3), High-Throughput Data will
+ * map to UP 1 and, thus, to the Background Access Category (AC_BK),
+ * which is contrary to the intent expressed in [RFC4594].
+
+ * Unfortunately, there really is no corresponding fit for the High-
+ * Throughput Data service class within the constrained 4 Access
+ * Category [IEEE.802.11-2016] model. If the High-Throughput Data
+ * service class is assigned to the Best Effort Access Category (AC_BE),
+ * then it would contend with Low-Latency Data (while [RFC4594]
+ * recommends a distinction in servicing between these service classes)
+ * as well as with the default service class; alternatively, if it is
+ * assigned to the Background Access Category (AC_BK), then it would
+ * receive a less-then-best-effort service and contend with Low-Priority
+ * Data (as discussed in Section 4.2.10).
+ *
+ * As such, since there is no directly corresponding fit for the High-
+ * Throughout Data service class within the [IEEE.802.11-2016] model, it
+ * is generally RECOMMENDED to map High-Throughput Data to UP 0, thereby
+ * admitting it to the Best Effort Access Category (AC_BE).
+ *
+ * Note: The above text is from RFC8325 which is describing the mapping
+ * between DSCP and 802.11 User Priority (UP) values. The mapping
+ * between UP and IEEE 802.1Q Traffic Type is not defined in the RFC but
+ * the 802.11 AC_BK and AC_BE are closely related to the IEEE 802.1Q
+ * Traffic Types BE and BK.
+ */
+ case DSCP_AF11:
+ case DSCP_AF12:
+ case DSCP_AF13:
+ return IEEE8021Q_TT_BE;
+ /* Comment from RFC8325:
+ * RFC3662 and RFC4594 both recommend Low-Priority Data be marked
+ * with DSCP CS1. The Low-Priority Data service class loosely
+ * corresponds to the [IEEE.802.11-2016] Background Access Category
+ */
+ case DSCP_CS1:
+ return IEEE8021Q_TT_BK;
+ case DSCP_CS2:
+ case DSCP_AF21:
+ case DSCP_AF22:
+ case DSCP_AF23:
+ return IEEE8021Q_TT_EE;
+ case DSCP_CS3:
+ case DSCP_AF31:
+ case DSCP_AF32:
+ case DSCP_AF33:
+ return IEEE8021Q_TT_CA;
+ case DSCP_CS4:
+ case DSCP_AF41:
+ case DSCP_AF42:
+ case DSCP_AF43:
+ return IEEE8021Q_TT_VI;
+ case DSCP_CS5:
+ case DSCP_EF:
+ case DSCP_VOICE_ADMIT:
+ return IEEE8021Q_TT_VO;
+ case DSCP_CS6:
+ return IEEE8021Q_TT_IC;
+ case DSCP_CS7:
+ return IEEE8021Q_TT_NC;
+ }
+
+ return SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp);
+}
+EXPORT_SYMBOL_GPL(ietf_dscp_to_ieee8021q_tt);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 429571c258..ab15064114 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -33,7 +33,7 @@ static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
-static unsigned char default_operstate(const struct net_device *dev)
+static unsigned int default_operstate(const struct net_device *dev)
{
if (netif_testing(dev))
return IF_OPER_TESTING;
@@ -62,16 +62,13 @@ static unsigned char default_operstate(const struct net_device *dev)
return IF_OPER_UP;
}
-
static void rfc2863_policy(struct net_device *dev)
{
- unsigned char operstate = default_operstate(dev);
+ unsigned int operstate = default_operstate(dev);
- if (operstate == dev->operstate)
+ if (operstate == READ_ONCE(dev->operstate))
return;
- write_lock(&dev_base_lock);
-
switch(dev->link_mode) {
case IF_LINK_MODE_TESTING:
if (operstate == IF_OPER_UP)
@@ -87,9 +84,7 @@ static void rfc2863_policy(struct net_device *dev)
break;
}
- dev->operstate = operstate;
-
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->operstate, operstate);
}
@@ -153,9 +148,9 @@ static void linkwatch_schedule_work(int urgent)
* override the existing timer.
*/
if (test_bit(LW_URGENT, &linkwatch_flags))
- mod_delayed_work(system_wq, &linkwatch_work, 0);
+ mod_delayed_work(system_unbound_wq, &linkwatch_work, 0);
else
- schedule_delayed_work(&linkwatch_work, delay);
+ queue_delayed_work(system_unbound_wq, &linkwatch_work, delay);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 552719c3bb..45fd88405b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -734,7 +734,9 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
+ bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
+
+ return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -1769,7 +1771,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms)
static struct lock_class_key neigh_table_proxy_queue_class;
-static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
+static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
void neigh_table_init(int index, struct neigh_table *tbl)
{
@@ -1826,13 +1828,19 @@ void neigh_table_init(int index, struct neigh_table *tbl)
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
- neigh_tables[index] = tbl;
+ rcu_assign_pointer(neigh_tables[index], tbl);
}
EXPORT_SYMBOL(neigh_table_init);
+/*
+ * Only called from ndisc_cleanup(), which means this is dead code
+ * because we no longer can unload IPv6 module.
+ */
int neigh_table_clear(int index, struct neigh_table *tbl)
{
- neigh_tables[index] = NULL;
+ RCU_INIT_POINTER(neigh_tables[index], NULL);
+ synchronize_rcu();
+
/* It is not clean... Fix it to unload IPv6 module safely */
cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
@@ -1864,10 +1872,10 @@ static struct neigh_table *neigh_find_table(int family)
switch (family) {
case AF_INET:
- tbl = neigh_tables[NEIGH_ARP_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
break;
case AF_INET6:
- tbl = neigh_tables[NEIGH_ND_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
break;
}
@@ -2331,7 +2339,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
ndtmsg = nlmsg_data(nlh);
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
if (!tbl)
continue;
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
@@ -2519,7 +2527,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
if (!tbl)
continue;
@@ -2674,7 +2682,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
if (!master_idx)
return false;
- master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+ master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
* invalid value for ifindex to denote "no master".
@@ -2707,7 +2715,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct net *net = sock_net(skb->sk);
struct neighbour *n;
- int rc, h, s_h = cb->args[1];
+ int err = 0, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
unsigned int flags = NLM_F_MULTI;
@@ -2715,7 +2723,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- rcu_read_lock();
nht = rcu_dereference(tbl->nht);
for (h = s_h; h < (1 << nht->hash_shift); h++) {
@@ -2729,23 +2736,19 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- flags) < 0) {
- rc = -1;
+ err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- rc = skb->len;
out:
- rcu_read_unlock();
cb->args[1] = h;
cb->args[2] = idx;
- return rc;
+ return err;
}
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
@@ -2754,7 +2757,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct pneigh_entry *n;
struct net *net = sock_net(skb->sk);
- int rc, h, s_h = cb->args[3];
+ int err = 0, h, s_h = cb->args[3];
int idx, s_idx = idx = cb->args[4];
unsigned int flags = NLM_F_MULTI;
@@ -2772,11 +2775,11 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH, flags, tbl) < 0) {
+ err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags, tbl);
+ if (err < 0) {
read_unlock_bh(&tbl->lock);
- rc = -1;
goto out;
}
next:
@@ -2785,12 +2788,10 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
}
read_unlock_bh(&tbl->lock);
- rc = skb->len;
out:
cb->args[3] = h;
cb->args[4] = idx;
- return rc;
-
+ return err;
}
static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
@@ -2878,8 +2879,9 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
+ rcu_read_lock();
for (t = 0; t < NEIGH_NR_TABLES; t++) {
- tbl = neigh_tables[t];
+ tbl = rcu_dereference(neigh_tables[t]);
if (!tbl)
continue;
@@ -2895,9 +2897,10 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
break;
}
+ rcu_read_unlock();
cb->args[0] = t;
- return skb->len;
+ return err;
}
static int neigh_valid_get_req(const struct nlmsghdr *nlh,
@@ -3143,14 +3146,15 @@ int neigh_xmit(int index, struct net_device *dev,
const void *addr, struct sk_buff *skb)
{
int err = -EAFNOSUPPORT;
+
if (likely(index < NEIGH_NR_TABLES)) {
struct neigh_table *tbl;
struct neighbour *neigh;
- tbl = neigh_tables[index];
- if (!tbl)
- goto out;
rcu_read_lock();
+ tbl = rcu_dereference(neigh_tables[index]);
+ if (!tbl)
+ goto out_unlock;
if (index == NEIGH_ARP_TABLE) {
u32 key = *((u32 *)addr);
@@ -3166,6 +3170,7 @@ int neigh_xmit(int index, struct net_device *dev,
goto out_kfree_skb;
}
err = READ_ONCE(neigh->output)(neigh, skb);
+out_unlock:
rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
@@ -3728,7 +3733,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
@@ -3779,7 +3784,6 @@ static struct neigh_sysctl_table {
.extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
- {},
},
};
@@ -3807,8 +3811,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
- sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
struct neigh_table *tbl = p->tbl;
@@ -3889,7 +3891,8 @@ static int __init neigh_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info,
+ RTNL_FLAG_DUMP_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
0);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 09f7ed1a04..fa6d396973 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -3,52 +3,22 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/wext.h>
+#include <net/hotdata.h>
#include "dev.h"
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos)
{
- struct net *net = seq_file_net(seq);
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct hlist_head *h;
- unsigned int count = 0, offset = get_offset(*pos);
- h = &net->dev_index_head[get_bucket(*pos)];
- hlist_for_each_entry_rcu(dev, h, index_hlist) {
- if (++count == offset)
- return dev;
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
+ *pos = dev->ifindex;
+ return dev;
}
-
- return NULL;
-}
-
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
- struct net_device *dev;
- unsigned int bucket;
-
- do {
- dev = dev_from_same_bucket(seq, pos);
- if (dev)
- return dev;
-
- bucket = get_bucket(*pos) + 1;
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < NETDEV_HASHENTRIES);
-
return NULL;
}
-/*
- * This is invoked by the /proc filesystem handler to display a device
- * in detail.
- */
static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
@@ -56,16 +26,13 @@ static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
- return NULL;
-
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void dev_seq_stop(struct seq_file *seq, void *v)
@@ -177,7 +144,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
- sd->processed, sd->dropped, sd->time_squeeze, 0,
+ sd->processed, atomic_read(&sd->dropped),
+ sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
sd->received_rps, flow_limit_count,
@@ -217,7 +185,7 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
}
}
- list_for_each_entry_rcu(pt, &ptype_all, list) {
+ list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
if (i == pos)
return pt;
++i;
@@ -265,13 +233,13 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
}
- nxt = ptype_all.next;
+ nxt = net_hotdata.ptype_all.next;
goto ptype_all;
}
if (pt->type == htons(ETH_P_ALL)) {
ptype_all:
- if (nxt != &ptype_all)
+ if (nxt != &net_hotdata.ptype_all)
goto found;
hash = 0;
nxt = ptype_base[0].next;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b..4c27a360c2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
#include <net/netdev_rx_queue.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -34,10 +35,10 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
-/* Caller holds RTNL or dev_base_lock */
+/* Caller holds RTNL or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
- return dev->reg_state <= NETREG_REGISTERED;
+ return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */
@@ -48,10 +49,10 @@ static ssize_t netdev_show(const struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = (*format)(ndev, buf);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
@@ -60,7 +61,7 @@ static ssize_t netdev_show(const struct device *dev,
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *dev, char *buf) \
{ \
- return sysfs_emit(buf, format_string, dev->field); \
+ return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
} \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -125,7 +126,7 @@ static DEVICE_ATTR_RO(iflink);
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
- return sysfs_emit(buf, fmt_dec, dev->name_assign_type);
+ return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
}
static ssize_t name_assign_type_show(struct device *dev,
@@ -135,24 +136,28 @@ static ssize_t name_assign_type_show(struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (ndev->name_assign_type != NET_NAME_UNKNOWN)
+ if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
ret = netdev_show(dev, attr, buf, format_name_assign_type);
return ret;
}
static DEVICE_ATTR_RO(name_assign_type);
-/* use same locking rules as GIFHWADDR ioctl's */
+/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ down_read(&dev_addr_sem);
+
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
+
+ up_read(&dev_addr_sem);
return ret;
}
static DEVICE_ATTR_RO(address);
@@ -161,10 +166,13 @@ static ssize_t broadcast_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *ndev = to_net_dev(dev);
+ int ret = -EINVAL;
+ rcu_read_lock();
if (dev_isalive(ndev))
- return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
- return -EINVAL;
+ ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
+ rcu_read_unlock();
+ return ret;
}
static DEVICE_ATTR_RO(broadcast);
@@ -318,11 +326,9 @@ static ssize_t operstate_show(struct device *dev,
const struct net_device *netdev = to_net_dev(dev);
unsigned char operstate;
- read_lock(&dev_base_lock);
- operstate = netdev->operstate;
+ operstate = READ_ONCE(netdev->operstate);
if (!netif_running(netdev))
operstate = IF_OPER_DOWN;
- read_unlock(&dev_base_lock);
if (operstate >= ARRAY_SIZE(operstates))
return -EINVAL; /* should not happen */
@@ -599,13 +605,13 @@ static ssize_t threaded_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ rcu_read_lock();
if (dev_isalive(netdev))
- ret = sysfs_emit(buf, fmt_dec, netdev->threaded);
+ ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
+
+ rcu_read_unlock();
- rtnl_unlock();
return ret;
}
@@ -680,14 +686,14 @@ static ssize_t netstat_show(const struct device *d,
WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
offset % sizeof(u64) != 0);
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(dev)) {
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
@@ -1409,6 +1415,65 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
+static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
+}
+
+static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ struct dql *dql = &queue->dql;
+ unsigned int value;
+ int err;
+
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+
+ value = msecs_to_jiffies(value);
+ if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
+ return -ERANGE;
+
+ if (!dql->stall_thrs && value)
+ dql->last_reap = jiffies;
+ /* Force last_reap to be live */
+ smp_wmb();
+ dql->stall_thrs = value;
+
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
+ __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
+
+static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
+}
+
+static ssize_t bql_set_stall_max(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ WRITE_ONCE(queue->dql.stall_max, 0);
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
+ __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
+
+static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sysfs_emit(buf, "%lu\n", dql->stall_cnt);
+}
+
+static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
+ __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
+
static ssize_t bql_show_inflight(struct netdev_queue *queue,
char *buf)
{
@@ -1447,6 +1512,9 @@ static struct attribute *dql_attrs[] __ro_after_init = {
&bql_limit_min_attribute.attr,
&bql_hold_time_attribute.attr,
&bql_inflight_attribute.attr,
+ &bql_stall_thrs_attribute.attr,
+ &bql_stall_cnt_attribute.attr,
+ &bql_stall_max_attribute.attr,
NULL
};
@@ -1454,6 +1522,9 @@ static const struct attribute_group dql_group = {
.name = "byte_queue_limits",
.attrs = dql_attrs,
};
+#else
+/* Fake declaration, all the code using it should be dead */
+extern const struct attribute_group dql_group;
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
@@ -1691,6 +1762,15 @@ static const struct kobj_type netdev_queue_ktype = {
.get_ownership = netdev_queue_get_ownership,
};
+static bool netdev_uses_bql(const struct net_device *dev)
+{
+ if (dev->features & NETIF_F_LLTX ||
+ dev->priv_flags & IFF_NO_QUEUE)
+ return false;
+
+ return IS_ENABLED(CONFIG_BQL);
+}
+
static int netdev_queue_add_kobject(struct net_device *dev, int index)
{
struct netdev_queue *queue = dev->_tx + index;
@@ -1708,11 +1788,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
-#ifdef CONFIG_BQL
- error = sysfs_create_group(kobj, &dql_group);
- if (error)
- goto err;
-#endif
+ if (netdev_uses_bql(dev)) {
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error)
+ goto err;
+ }
kobject_uevent(kobj, KOBJ_ADD);
return 0;
@@ -1733,9 +1813,9 @@ static int tx_queue_change_owner(struct net_device *ndev, int index,
if (error)
return error;
-#ifdef CONFIG_BQL
- error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
-#endif
+ if (netdev_uses_bql(ndev))
+ error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
+
return error;
}
#endif /* CONFIG_SYSFS */
@@ -1767,9 +1847,10 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (!refcount_read(&dev_net(dev)->ns.count))
queue->kobj.uevent_suppress = 1;
-#ifdef CONFIG_BQL
- sysfs_remove_group(&queue->kobj, &dql_group);
-#endif
+
+ if (netdev_uses_bql(dev))
+ sysfs_remove_group(&queue->kobj, &dql_group);
+
kobject_put(&queue->kobj);
}
@@ -1965,7 +2046,7 @@ static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
net_ns_get_ownership(net, uid, gid);
}
-static struct class net_class __ro_after_init = {
+static const struct class net_class = {
.name = "net",
.dev_release = netdev_release,
.dev_groups = net_class_groups,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7279953342..6a823ba906 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie);
static struct net_generic *net_alloc_generic(void)
{
+ unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
+ unsigned int generic_size;
struct net_generic *ng;
- unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ generic_size = offsetof(struct net_generic, ptr[gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->s.len = max_gen_ptrs;
+ ng->s.len = gen_ptrs;
return ng;
}
@@ -318,8 +321,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
- int error = 0;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
+ int error = 0;
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
@@ -358,6 +362,15 @@ out_undo:
synchronize_rcu();
ops = saved_ops;
+ rtnl_lock();
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+
+ ops = saved_ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
@@ -573,6 +586,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
@@ -611,7 +625,15 @@ static void cleanup_net(struct work_struct *work)
* the rcu_barrier() below isn't sufficient alone.
* Also the pre_exit() and exit() methods need this barrier.
*/
- synchronize_rcu();
+ synchronize_rcu_expedited();
+
+ rtnl_lock();
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list)
@@ -671,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net);
* get_net_ns - increment the refcount of the network namespace
* @ns: common namespace (net)
*
- * Returns the net's common namespace.
+ * Returns the net's common namespace or ERR_PTR() if ref is zero.
*/
struct ns_common *get_net_ns(struct ns_common *ns)
{
- return &get_net(container_of(ns, struct net, ns))->ns;
+ struct net *net;
+
+ net = maybe_get_net(container_of(ns, struct net, ns));
+ if (net)
+ return &net->ns;
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(get_net_ns);
@@ -1071,7 +1098,7 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
end:
if (net_cb.fillargs.add_ref)
put_net(net_cb.tgt_net);
- return err < 0 ? err : skb->len;
+ return err;
}
static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
@@ -1186,14 +1213,25 @@ void __init net_ns_init(void)
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
}
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
ops_pre_exit_list(ops, net_exit_list);
synchronize_rcu();
+
+ if (ops->exit_batch_rtnl) {
+ LIST_HEAD(dev_kill_list);
+
+ rtnl_lock();
+ ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+ }
ops_exit_list(ops, net_exit_list);
+
ops_free_list(ops, net_exit_list);
}
@@ -1278,7 +1316,11 @@ static int register_pernet_operations(struct list_head *list,
if (error < 0)
return error;
*ops->id = error;
- max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
+ /* This does not require READ_ONCE as writers already hold
+ * pernet_ops_rwsem. But WRITE_ONCE is needed to protect
+ * net_alloc_generic.
+ */
+ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1));
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/net_test.c b/net/core/net_test.c
new file mode 100644
index 0000000000..9c3a590865
--- /dev/null
+++ b/net/core/net_test.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <kunit/test.h>
+
+/* GSO */
+
+#include <linux/skbuff.h>
+
+static const char hdr[] = "abcdefgh";
+#define GSO_TEST_SIZE 1000
+
+static void __init_skb(struct sk_buff *skb)
+{
+ skb_reset_mac_header(skb);
+ memcpy(skb_mac_header(skb), hdr, sizeof(hdr));
+
+ /* skb_segment expects skb->data at start of payload */
+ skb_pull(skb, sizeof(hdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+
+ /* proto is arbitrary, as long as not ETH_P_TEB or vlan */
+ skb->protocol = htons(ETH_P_ATALK);
+ skb_shinfo(skb)->gso_size = GSO_TEST_SIZE;
+}
+
+enum gso_test_nr {
+ GSO_TEST_LINEAR,
+ GSO_TEST_NO_GSO,
+ GSO_TEST_FRAGS,
+ GSO_TEST_FRAGS_PURE,
+ GSO_TEST_GSO_PARTIAL,
+ GSO_TEST_FRAG_LIST,
+ GSO_TEST_FRAG_LIST_PURE,
+ GSO_TEST_FRAG_LIST_NON_UNIFORM,
+ GSO_TEST_GSO_BY_FRAGS,
+};
+
+struct gso_test_case {
+ enum gso_test_nr id;
+ const char *name;
+
+ /* input */
+ unsigned int linear_len;
+ unsigned int nr_frags;
+ const unsigned int *frags;
+ unsigned int nr_frag_skbs;
+ const unsigned int *frag_skbs;
+
+ /* output as expected */
+ unsigned int nr_segs;
+ const unsigned int *segs;
+};
+
+static struct gso_test_case cases[] = {
+ {
+ .id = GSO_TEST_NO_GSO,
+ .name = "no_gso",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_segs = 1,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE },
+ },
+ {
+ .id = GSO_TEST_LINEAR,
+ .name = "linear",
+ .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1,
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
+ },
+ {
+ .id = GSO_TEST_FRAGS,
+ .name = "frags",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frags = 2,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
+ },
+ {
+ .id = GSO_TEST_FRAGS_PURE,
+ .name = "frags_pure",
+ .nr_frags = 3,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
+ },
+ {
+ .id = GSO_TEST_GSO_PARTIAL,
+ .name = "gso_partial",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frags = 2,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 },
+ .nr_segs = 2,
+ .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 },
+ },
+ {
+ /* commit 89319d3801d1: frag_list on mss boundaries */
+ .id = GSO_TEST_FRAG_LIST,
+ .name = "frag_list",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frag_skbs = 2,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE },
+ },
+ {
+ .id = GSO_TEST_FRAG_LIST_PURE,
+ .name = "frag_list_pure",
+ .nr_frag_skbs = 2,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ .nr_segs = 2,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ },
+ {
+ /* commit 43170c4e0ba7: GRO of frag_list trains */
+ .id = GSO_TEST_FRAG_LIST_NON_UNIFORM,
+ .name = "frag_list_non_uniform",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frag_skbs = 4,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 },
+ .nr_segs = 4,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 },
+ },
+ {
+ /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and
+ * commit 90017accff61 ("sctp: Add GSO support")
+ *
+ * "there will be a cover skb with protocol headers and
+ * children ones containing the actual segments"
+ */
+ .id = GSO_TEST_GSO_BY_FRAGS,
+ .name = "gso_by_frags",
+ .nr_frag_skbs = 4,
+ .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 },
+ .nr_segs = 4,
+ .segs = (const unsigned int[]) { 100, 200, 300, 400 },
+ },
+};
+
+static void gso_test_case_to_desc(struct gso_test_case *t, char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc);
+
+static void gso_test_func(struct kunit *test)
+{
+ const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct sk_buff *skb, *segs, *cur, *next, *last;
+ const struct gso_test_case *tcase;
+ netdev_features_t features;
+ struct page *page;
+ int i;
+
+ tcase = test->param_value;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size);
+ KUNIT_ASSERT_NOT_NULL(test, skb);
+ __skb_put(skb, sizeof(hdr) + tcase->linear_len);
+
+ __init_skb(skb);
+
+ if (tcase->nr_frags) {
+ unsigned int pg_off = 0;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ page_ref_add(page, tcase->nr_frags - 1);
+
+ for (i = 0; i < tcase->nr_frags; i++) {
+ skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]);
+ pg_off += tcase->frags[i];
+ }
+
+ KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE);
+
+ skb->data_len = pg_off;
+ skb->len += skb->data_len;
+ skb->truesize += skb->data_len;
+ }
+
+ if (tcase->frag_skbs) {
+ unsigned int total_size = 0, total_true_size = 0;
+ struct sk_buff *frag_skb, *prev = NULL;
+
+ for (i = 0; i < tcase->nr_frag_skbs; i++) {
+ unsigned int frag_size;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+
+ frag_size = tcase->frag_skbs[i];
+ frag_skb = build_skb(page_address(page),
+ frag_size + shinfo_size);
+ KUNIT_ASSERT_NOT_NULL(test, frag_skb);
+ __skb_put(frag_skb, frag_size);
+
+ if (prev)
+ prev->next = frag_skb;
+ else
+ skb_shinfo(skb)->frag_list = frag_skb;
+ prev = frag_skb;
+
+ total_size += frag_size;
+ total_true_size += frag_skb->truesize;
+ }
+
+ skb->len += total_size;
+ skb->data_len += total_size;
+ skb->truesize += total_true_size;
+
+ if (tcase->id == GSO_TEST_GSO_BY_FRAGS)
+ skb_shinfo(skb)->gso_size = GSO_BY_FRAGS;
+ }
+
+ features = NETIF_F_SG | NETIF_F_HW_CSUM;
+ if (tcase->id == GSO_TEST_GSO_PARTIAL)
+ features |= NETIF_F_GSO_PARTIAL;
+
+ /* TODO: this should also work with SG,
+ * rather than hit BUG_ON(i >= nfrags)
+ */
+ if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM)
+ features &= ~NETIF_F_SG;
+
+ segs = skb_segment(skb, features);
+ if (IS_ERR(segs)) {
+ KUNIT_FAIL(test, "segs error %pe", segs);
+ goto free_gso_skb;
+ } else if (!segs) {
+ KUNIT_FAIL(test, "no segments");
+ goto free_gso_skb;
+ }
+
+ last = segs->prev;
+ for (cur = segs, i = 0; cur; cur = next, i++) {
+ next = cur->next;
+
+ KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]);
+
+ /* segs have skb->data pointing to the mac header */
+ KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data);
+ KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr));
+
+ /* header was copied to all segs */
+ KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0);
+
+ /* last seg can be found through segs->prev pointer */
+ if (!next)
+ KUNIT_ASSERT_PTR_EQ(test, cur, last);
+
+ consume_skb(cur);
+ }
+
+ KUNIT_ASSERT_EQ(test, i, tcase->nr_segs);
+
+free_gso_skb:
+ consume_skb(skb);
+}
+
+/* IP tunnel flags */
+
+#include <net/ip_tunnels.h>
+
+struct ip_tunnel_flags_test {
+ const char *name;
+
+ const u16 *src_bits;
+ const u16 *exp_bits;
+ u8 src_num;
+ u8 exp_num;
+
+ __be16 exp_val;
+ bool exp_comp;
+};
+
+#define IP_TUNNEL_FLAGS_TEST(n, src, comp, eval, exp) { \
+ .name = (n), \
+ .src_bits = (src), \
+ .src_num = ARRAY_SIZE(src), \
+ .exp_comp = (comp), \
+ .exp_val = (eval), \
+ .exp_bits = (exp), \
+ .exp_num = ARRAY_SIZE(exp), \
+}
+
+/* These are __be16-compatible and can be compared as is */
+static const u16 ip_tunnel_flags_1[] = {
+ IP_TUNNEL_KEY_BIT,
+ IP_TUNNEL_STRICT_BIT,
+ IP_TUNNEL_ERSPAN_OPT_BIT,
+};
+
+/* Due to the previous flags design limitation, setting either
+ * ``IP_TUNNEL_CSUM_BIT`` (on Big Endian) or ``IP_TUNNEL_DONT_FRAGMENT_BIT``
+ * (on Little) also sets VTI/ISATAP bit. In the bitmap implementation, they
+ * correspond to ``BIT(16)``, which is bigger than ``U16_MAX``, but still is
+ * backward-compatible.
+ */
+#ifdef __LITTLE_ENDIAN
+#define IP_TUNNEL_CONFLICT_BIT IP_TUNNEL_DONT_FRAGMENT_BIT
+#else
+#define IP_TUNNEL_CONFLICT_BIT IP_TUNNEL_CSUM_BIT
+#endif
+
+static const u16 ip_tunnel_flags_2_src[] = {
+ IP_TUNNEL_CONFLICT_BIT,
+};
+
+static const u16 ip_tunnel_flags_2_exp[] = {
+ IP_TUNNEL_CONFLICT_BIT,
+ IP_TUNNEL_SIT_ISATAP_BIT,
+};
+
+/* Bits 17 and higher are not compatible with __be16 flags */
+static const u16 ip_tunnel_flags_3_src[] = {
+ IP_TUNNEL_VXLAN_OPT_BIT,
+ 17,
+ 18,
+ 20,
+};
+
+static const u16 ip_tunnel_flags_3_exp[] = {
+ IP_TUNNEL_VXLAN_OPT_BIT,
+};
+
+static const struct ip_tunnel_flags_test ip_tunnel_flags_test[] = {
+ IP_TUNNEL_FLAGS_TEST("compat", ip_tunnel_flags_1, true,
+ cpu_to_be16(BIT(IP_TUNNEL_KEY_BIT) |
+ BIT(IP_TUNNEL_STRICT_BIT) |
+ BIT(IP_TUNNEL_ERSPAN_OPT_BIT)),
+ ip_tunnel_flags_1),
+ IP_TUNNEL_FLAGS_TEST("conflict", ip_tunnel_flags_2_src, true,
+ VTI_ISVTI, ip_tunnel_flags_2_exp),
+ IP_TUNNEL_FLAGS_TEST("new", ip_tunnel_flags_3_src, false,
+ cpu_to_be16(BIT(IP_TUNNEL_VXLAN_OPT_BIT)),
+ ip_tunnel_flags_3_exp),
+};
+
+static void
+ip_tunnel_flags_test_case_to_desc(const struct ip_tunnel_flags_test *t,
+ char *desc)
+{
+ strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+KUNIT_ARRAY_PARAM(ip_tunnel_flags_test, ip_tunnel_flags_test,
+ ip_tunnel_flags_test_case_to_desc);
+
+static void ip_tunnel_flags_test_run(struct kunit *test)
+{
+ const struct ip_tunnel_flags_test *t = test->param_value;
+ IP_TUNNEL_DECLARE_FLAGS(src) = { };
+ IP_TUNNEL_DECLARE_FLAGS(exp) = { };
+ IP_TUNNEL_DECLARE_FLAGS(out);
+
+ for (u32 j = 0; j < t->src_num; j++)
+ __set_bit(t->src_bits[j], src);
+ for (u32 j = 0; j < t->exp_num; j++)
+ __set_bit(t->exp_bits[j], exp);
+
+ KUNIT_ASSERT_EQ(test, t->exp_comp,
+ ip_tunnel_flags_is_be16_compat(src));
+ KUNIT_ASSERT_EQ(test, (__force u16)t->exp_val,
+ (__force u16)ip_tunnel_flags_to_be16(src));
+
+ ip_tunnel_flags_from_be16(out, t->exp_val);
+ KUNIT_ASSERT_TRUE(test, __ipt_flag_op(bitmap_equal, exp, out));
+}
+
+static struct kunit_case net_test_cases[] = {
+ KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params),
+ KUNIT_CASE_PARAM(ip_tunnel_flags_test_run,
+ ip_tunnel_flags_test_gen_params),
+ { },
+};
+
+static struct kunit_suite net_test_suite = {
+ .name = "net_core",
+ .test_cases = net_test_cases,
+};
+kunit_test_suite(net_test_suite);
+
+MODULE_DESCRIPTION("KUnit tests for networking core");
+MODULE_LICENSE("GPL");
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index be7f2ebd61..8350a0afa9 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -68,6 +68,12 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN
[NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
};
+/* NETDEV_CMD_QSTATS_GET - dump */
+static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = {
+ [NETDEV_A_QSTATS_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -138,6 +144,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_NAPI_IFINDEX,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NETDEV_CMD_QSTATS_GET,
+ .dumpit = netdev_nl_qstats_get_dumpit,
+ .policy = netdev_qstats_get_nl_policy,
+ .maxattr = NETDEV_A_QSTATS_SCOPE,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index a47f2bcbe4..4db40fd5b4 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -28,6 +28,8 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
enum {
NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 918b109e0c..05f9515d2c 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -8,6 +8,7 @@
#include <net/xdp.h>
#include <net/xdp_sock.h>
#include <net/netdev_rx_queue.h>
+#include <net/netdev_queues.h>
#include <net/busy_poll.h>
#include "netdev-genl-gen.h"
@@ -58,22 +59,22 @@ XDP_METADATA_KFUNC_xxx
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
xdp_rx_meta, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
- xsk_features, NETDEV_A_DEV_PAD)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ xsk_features, NETDEV_A_DEV_PAD))
+ goto err_cancel_msg;
if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
- netdev->xdp_zc_max_segs)) {
- genlmsg_cancel(rsp, hdr);
- return -EINVAL;
- }
+ netdev->xdp_zc_max_segs))
+ goto err_cancel_msg;
}
genlmsg_end(rsp, hdr);
return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
}
static void
@@ -460,6 +461,266 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return err;
}
+#define NETDEV_STAT_NOT_SET (~0ULL)
+
+static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
+{
+ const u64 *add = _add;
+ u64 *sum = _sum;
+
+ while (size) {
+ if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
+ *sum += *add;
+ sum++;
+ add++;
+ size -= 8;
+ }
+}
+
+static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
+{
+ if (value == NETDEV_STAT_NOT_SET)
+ return 0;
+ return nla_put_uint(rsp, attr_id, value);
+}
+
+static int
+netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
+{
+ if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int
+netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
+{
+ if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int
+netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
+ u32 q_type, int i, const struct genl_info *info)
+{
+ const struct netdev_stat_ops *ops = netdev->stat_ops;
+ struct netdev_queue_stats_rx rx;
+ struct netdev_queue_stats_tx tx;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+ if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
+ nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
+ nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
+ goto nla_put_failure;
+
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ memset(&rx, 0xff, sizeof(rx));
+ ops->get_queue_stats_rx(netdev, i, &rx);
+ if (!memchr_inv(&rx, 0xff, sizeof(rx)))
+ goto nla_cancel;
+ if (netdev_nl_stats_write_rx(rsp, &rx))
+ goto nla_put_failure;
+ break;
+ case NETDEV_QUEUE_TYPE_TX:
+ memset(&tx, 0xff, sizeof(tx));
+ ops->get_queue_stats_tx(netdev, i, &tx);
+ if (!memchr_inv(&tx, 0xff, sizeof(tx)))
+ goto nla_cancel;
+ if (netdev_nl_stats_write_tx(rsp, &tx))
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(rsp, hdr);
+ return 0;
+
+nla_cancel:
+ genlmsg_cancel(rsp, hdr);
+ return 0;
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ const struct netdev_stat_ops *ops = netdev->stat_ops;
+ int i, err;
+
+ if (!(netdev->flags & IFF_UP))
+ return 0;
+
+ i = ctx->rxq_idx;
+ while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
+ err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
+ i, info);
+ if (err)
+ return err;
+ ctx->rxq_idx = i++;
+ }
+ i = ctx->txq_idx;
+ while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
+ err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
+ i, info);
+ if (err)
+ return err;
+ ctx->txq_idx = i++;
+ }
+
+ ctx->rxq_idx = 0;
+ ctx->txq_idx = 0;
+ return 0;
+}
+
+static int
+netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info)
+{
+ struct netdev_queue_stats_rx rx_sum, rx;
+ struct netdev_queue_stats_tx tx_sum, tx;
+ const struct netdev_stat_ops *ops;
+ void *hdr;
+ int i;
+
+ ops = netdev->stat_ops;
+ /* Netdev can't guarantee any complete counters */
+ if (!ops->get_base_stats)
+ return 0;
+
+ memset(&rx_sum, 0xff, sizeof(rx_sum));
+ memset(&tx_sum, 0xff, sizeof(tx_sum));
+
+ ops->get_base_stats(netdev, &rx_sum, &tx_sum);
+
+ /* The op was there, but nothing reported, don't bother */
+ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
+ !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+ if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
+ goto nla_put_failure;
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++) {
+ memset(&rx, 0xff, sizeof(rx));
+ if (ops->get_queue_stats_rx)
+ ops->get_queue_stats_rx(netdev, i, &rx);
+ netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx));
+ }
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ memset(&tx, 0xff, sizeof(tx));
+ if (ops->get_queue_stats_tx)
+ ops->get_queue_stats_tx(netdev, i, &tx);
+ netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx));
+ }
+
+ if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
+ netdev_nl_stats_write_tx(rsp, &tx_sum))
+ goto nla_put_failure;
+
+ genlmsg_end(rsp, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
+ struct sk_buff *skb, const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ if (!netdev->stat_ops)
+ return 0;
+
+ switch (scope) {
+ case 0:
+ return netdev_nl_stats_by_netdev(netdev, skb, info);
+ case NETDEV_QSTATS_SCOPE_QUEUE:
+ return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
+ }
+
+ return -EINVAL; /* Should not happen, per netlink policy */
+}
+
+int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ unsigned int ifindex;
+ unsigned int scope;
+ int err = 0;
+
+ scope = 0;
+ if (info->attrs[NETDEV_A_QSTATS_SCOPE])
+ scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
+
+ ifindex = 0;
+ if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev && netdev->stat_ops) {
+ err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+ info, ctx);
+ } else {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NETDEV_A_QSTATS_IFINDEX]);
+ err = netdev ? -EOPNOTSUPP : -ENODEV;
+ }
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+ info, ctx);
+ if (err < 0)
+ break;
+ }
+ }
+ rtnl_unlock();
+
+ return err;
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 543007f159..55bcacf67d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi;
list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner == smp_processor_id())
+ if (READ_ONCE(napi->poll_owner) == smp_processor_id())
return 1;
}
return 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a..3772eb63dc 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -5,6 +5,7 @@
* Copyright (C) 2016 Red Hat, Inc.
*/
+#include <linux/error-injection.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -31,6 +32,8 @@
#define BIAS_MAX (LONG_MAX >> 1)
#ifdef CONFIG_PAGE_POOL_STATS
+static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats);
+
/* alloc_stat_inc is intended to be used in softirq context */
#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
/* recycle_stat_inc is safe to use when preemption is possible. */
@@ -121,9 +124,9 @@ int page_pool_ethtool_stats_get_count(void)
}
EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
-u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
{
- struct page_pool_stats *pool_stats = stats;
+ const struct page_pool_stats *pool_stats = stats;
*data++ = pool_stats->alloc_stats.fast;
*data++ = pool_stats->alloc_stats.slow;
@@ -170,16 +173,29 @@ static void page_pool_producer_unlock(struct page_pool *pool,
spin_unlock_bh(&pool->ring.producer_lock);
}
+static void page_pool_struct_check(void)
+{
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
+ CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
+}
+
static int page_pool_init(struct page_pool *pool,
- const struct page_pool_params *params)
+ const struct page_pool_params *params,
+ int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ page_pool_struct_check();
+
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
+ pool->cpuid = cpuid;
+
/* Validate only known flags were used */
- if (pool->p.flags & ~(PP_FLAG_ALL))
+ if (pool->slow.flags & ~PP_FLAG_ALL)
return -EINVAL;
if (pool->p.pool_size)
@@ -193,22 +209,26 @@ static int page_pool_init(struct page_pool *pool,
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
* which is the XDP_TX use-case.
*/
- if (pool->p.flags & PP_FLAG_DMA_MAP) {
+ if (pool->slow.flags & PP_FLAG_DMA_MAP) {
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
+
+ pool->dma_map = true;
}
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
+ if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
/* In order to request DMA-sync-for-device the page
* needs to be mapped
*/
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
return -EINVAL;
if (!pool->p.max_len)
return -EINVAL;
+ pool->dma_sync = true;
+
/* pool->p.offset has to be set according to the address
* offset used by the DMA engine to start copying rx data
*/
@@ -217,14 +237,24 @@ static int page_pool_init(struct page_pool *pool,
pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
- pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
- if (!pool->recycle_stats)
- return -ENOMEM;
+ if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
+ pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
+ if (!pool->recycle_stats)
+ return -ENOMEM;
+ } else {
+ /* For system page pool instance we use a singular stats object
+ * instead of allocating a separate percpu variable for each
+ * (also percpu) page pool instance.
+ */
+ pool->recycle_stats = &pp_system_recycle_stats;
+ pool->system = true;
+ }
#endif
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
+ if (!pool->system)
+ free_percpu(pool->recycle_stats);
#endif
return -ENOMEM;
}
@@ -234,7 +264,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
get_device(pool->p.dev);
return 0;
@@ -244,19 +274,22 @@ static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
+ if (!pool->system)
+ free_percpu(pool->recycle_stats);
#endif
}
/**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
* @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
*/
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
{
struct page_pool *pool;
int err;
@@ -265,7 +298,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
if (!pool)
return ERR_PTR(-ENOMEM);
- err = page_pool_init(pool, params);
+ err = page_pool_init(pool, params, cpuid);
if (err < 0)
goto err_free;
@@ -282,6 +315,16 @@ err_free:
kfree(pool);
return ERR_PTR(err);
}
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+ return page_pool_create_percpu(params, -1);
+}
EXPORT_SYMBOL(page_pool_create);
static void page_pool_return_page(struct page_pool *pool, struct page *page);
@@ -356,16 +399,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
return page;
}
-static void page_pool_dma_sync_for_device(struct page_pool *pool,
- struct page *page,
- unsigned int dma_sync_size)
+static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
+ const struct page *page,
+ u32 dma_sync_size)
{
+#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
dma_addr_t dma_addr = page_pool_get_dma_addr(page);
dma_sync_size = min(dma_sync_size, pool->p.max_len);
- dma_sync_single_range_for_device(pool->p.dev, dma_addr,
- pool->p.offset, dma_sync_size,
- pool->p.dma_dir);
+ __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
+ dma_sync_size, pool->p.dma_dir);
+#endif
+}
+
+static __always_inline void
+page_pool_dma_sync_for_device(const struct page_pool *pool,
+ const struct page *page,
+ u32 dma_sync_size)
+{
+ if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
+ __page_pool_dma_sync_for_device(pool, page, dma_sync_size);
}
static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
@@ -387,13 +440,12 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (page_pool_set_dma_addr(page, dma))
goto unmap_failed;
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+ page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
return true;
unmap_failed:
- WARN_ON_ONCE("unexpected DMA address, please report to netdev@");
+ WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
@@ -433,8 +485,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
return NULL;
}
@@ -454,8 +505,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
- unsigned int pp_flags = pool->p.flags;
unsigned int pp_order = pool->p.order;
+ bool dma_map = pool->dma_map;
struct page *page;
int i, nr_pages;
@@ -480,8 +531,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
page = pool->alloc.cache[i];
- if ((pp_flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
+ if (dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
continue;
}
@@ -523,6 +573,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
return page;
}
EXPORT_SYMBOL(page_pool_alloc_pages);
+ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
/* Calculate distance between two u32 values, valid if distance is below 2^(31)
* https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
@@ -553,7 +604,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!pool->dma_map)
/* Always account for inflight pages, even if we didn't
* map them
*/
@@ -630,8 +681,13 @@ static bool page_pool_recycle_in_cache(struct page *page,
return true;
}
+static bool __page_pool_page_can_be_recycled(const struct page *page)
+{
+ return page_ref_count(page) == 1 && !page_is_pfmemalloc(page);
+}
+
/* If the page refcnt == 1, this will try to recycle the page.
- * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
+ * If pool->dma_sync is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
* If the page refcnt != 1, then the page will be returned to memory
* subsystem.
@@ -651,15 +707,12 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* page is NOT reusable when allocated when system is under
* some pressure. (page_is_pfmemalloc)
*/
- if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
+ if (likely(__page_pool_page_can_be_recycled(page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page,
- dma_sync_size);
+ page_pool_dma_sync_for_device(pool, page, dma_sync_size);
- if (allow_direct && in_softirq() &&
- page_pool_recycle_in_cache(page, pool))
+ if (allow_direct && page_pool_recycle_in_cache(page, pool))
return NULL;
/* Page found as candidate for recycling */
@@ -684,9 +737,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
+static bool page_pool_napi_local(const struct page_pool *pool)
+{
+ const struct napi_struct *napi;
+ u32 cpuid;
+
+ if (unlikely(!in_softirq()))
+ return false;
+
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ * __page_pool_put_page() makes sure we're not in hardirq context
+ * and interrupts are enabled prior to accessing the cache.
+ */
+ cpuid = smp_processor_id();
+ if (READ_ONCE(pool->cpuid) == cpuid)
+ return true;
+
+ napi = READ_ONCE(pool->p.napi);
+
+ return napi && READ_ONCE(napi->list_owner) == cpuid;
+}
+
void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ if (!allow_direct)
+ allow_direct = page_pool_napi_local(pool);
+
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
/* Cache full, fallback to free pages */
@@ -715,8 +794,11 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
+ bool allow_direct;
bool in_softirq;
+ allow_direct = page_pool_napi_local(pool);
+
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
@@ -724,13 +806,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
if (!page_pool_is_last_ref(page))
continue;
- page = __page_pool_put_page(pool, page, -1, false);
+ page = __page_pool_put_page(pool, page, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
if (page)
data[bulk_len++] = page;
}
- if (unlikely(!bulk_len))
+ if (!bulk_len)
return;
/* Bulk producer into ptr_ring page_pool cache */
@@ -766,10 +848,8 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
if (likely(page_pool_unref_page(page, drain_count)))
return NULL;
- if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, -1);
-
+ if (__page_pool_page_can_be_recycled(page)) {
+ page_pool_dma_sync_for_device(pool, page, -1);
return page;
}
@@ -927,15 +1007,20 @@ static void page_pool_release_retry(struct work_struct *wq)
}
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
- struct xdp_mem_info *mem)
+ const struct xdp_mem_info *mem)
{
refcount_inc(&pool->user_cnt);
pool->disconnect = disconnect;
pool->xdp_mem_id = mem->id;
}
-void page_pool_unlink_napi(struct page_pool *pool)
+static void page_pool_disable_direct_recycling(struct page_pool *pool)
{
+ /* Disable direct recycling based on pool->cpuid.
+ * Paired with READ_ONCE() in page_pool_napi_local().
+ */
+ WRITE_ONCE(pool->cpuid, -1);
+
if (!pool->p.napi)
return;
@@ -947,7 +1032,6 @@ void page_pool_unlink_napi(struct page_pool *pool)
WRITE_ONCE(pool->p.napi, NULL);
}
-EXPORT_SYMBOL(page_pool_unlink_napi);
void page_pool_destroy(struct page_pool *pool)
{
@@ -957,7 +1041,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
- page_pool_unlink_napi(pool);
+ page_pool_disable_direct_recycling(pool);
page_pool_free_frag(pool);
if (!page_pool_release(pool))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bd50e9fe32..5e589f0a62 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -483,24 +483,15 @@ EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
*/
static void rtnl_lock_unregistering_all(void)
{
- struct net *net;
- bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
- unregistering = false;
rtnl_lock();
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
- for_each_net(net) {
- if (atomic_read(&net->dev_unreg_count) > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
+ if (!atomic_read(&dev_unreg_count))
break;
__rtnl_unlock();
@@ -851,9 +842,22 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
+void netdev_set_operstate(struct net_device *dev, int newstate)
+{
+ unsigned int old = READ_ONCE(dev->operstate);
+
+ do {
+ if (old == newstate)
+ return;
+ } while (!try_cmpxchg(&dev->operstate, &old, newstate));
+
+ netdev_state_change(dev);
+}
+EXPORT_SYMBOL(netdev_set_operstate);
+
static void set_operstate(struct net_device *dev, unsigned char transition)
{
- unsigned char operstate = dev->operstate;
+ unsigned char operstate = READ_ONCE(dev->operstate);
switch (transition) {
case IF_OPER_UP:
@@ -875,12 +879,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
}
- if (dev->operstate != operstate) {
- write_lock(&dev_base_lock);
- dev->operstate = operstate;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- }
+ netdev_set_operstate(dev, operstate);
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
@@ -1037,8 +1036,8 @@ static size_t rtnl_proto_down_size(const struct net_device *dev)
{
size_t size = nla_total_size(1);
- if (dev->proto_down_reason)
- size += nla_total_size(0) + nla_total_size(4);
+ /* Assume dev->proto_down_reason is not zero. */
+ size += nla_total_size(0) + nla_total_size(4);
return size;
}
@@ -1456,17 +1455,18 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return 0;
}
-static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_fill_link_ifmap(struct sk_buff *skb,
+ const struct net_device *dev)
{
struct rtnl_link_ifmap map;
memset(&map, 0, sizeof(map));
- map.mem_start = dev->mem_start;
- map.mem_end = dev->mem_end;
- map.base_addr = dev->base_addr;
- map.irq = dev->irq;
- map.dma = dev->dma;
- map.port = dev->if_port;
+ map.mem_start = READ_ONCE(dev->mem_start);
+ map.mem_end = READ_ONCE(dev->mem_end);
+ map.base_addr = READ_ONCE(dev->base_addr);
+ map.irq = READ_ONCE(dev->irq);
+ map.dma = READ_ONCE(dev->dma);
+ map.port = READ_ONCE(dev->if_port);
if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
const struct bpf_prog *generic_xdp_prog;
+ u32 res = 0;
- ASSERT_RTNL();
+ rcu_read_lock();
+ generic_xdp_prog = rcu_dereference(dev->xdp_prog);
+ if (generic_xdp_prog)
+ res = generic_xdp_prog->aux->id;
+ rcu_read_unlock();
- generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
- if (!generic_xdp_prog)
- return 0;
- return generic_xdp_prog->aux->id;
+ return res;
}
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
- ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+ ret = nla_put_u32(skb, IFLA_MASTER,
+ READ_ONCE(upper_dev->ifindex));
rcu_read_unlock();
return ret;
@@ -1612,10 +1615,10 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
bool force)
{
- int ifindex = dev_get_iflink(dev);
+ int iflink = dev_get_iflink(dev);
- if (force || dev->ifindex != ifindex)
- return nla_put_u32(skb, IFLA_LINK, ifindex);
+ if (force || READ_ONCE(dev->ifindex) != iflink)
+ return nla_put_u32(skb, IFLA_LINK, iflink);
return 0;
}
@@ -1699,7 +1702,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
struct netdev_name_node *name_node;
int count = 0;
- list_for_each_entry(name_node, &dev->name_node->list, list) {
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list) {
if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
return -EMSGSIZE;
count++;
@@ -1707,6 +1710,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
return count;
}
+/* RCU protected. */
static int rtnl_fill_prop_list(struct sk_buff *skb,
const struct net_device *dev)
{
@@ -1735,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb,
struct nlattr *pr;
u32 preason;
- if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down)))
goto nla_put_failure;
- preason = dev->proto_down_reason;
+ preason = READ_ONCE(dev->proto_down_reason);
if (!preason)
return 0;
@@ -1811,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
u32 event, int *new_nsid, int new_ifindex,
int tgt_netnsid, gfp_t gfp)
{
+ char devname[IFNAMSIZ];
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct Qdisc *qdisc;
@@ -1823,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
- ifm->ifi_type = dev->type;
- ifm->ifi_index = dev->ifindex;
+ ifm->ifi_type = READ_ONCE(dev->type);
+ ifm->ifi_index = READ_ONCE(dev->ifindex);
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
- qdisc = rtnl_dereference(dev->qdisc);
- if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
- nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
+ netdev_copy_name(dev, devname);
+ if (nla_put_string(skb, IFLA_IFNAME, devname))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
- nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
- nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
- nla_put_u32(skb, IFLA_GROUP, dev->group) ||
- nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
- nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
- nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
- nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
- nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
+ netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN) ||
+ nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) ||
+ nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) ||
+ nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) ||
+ nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) ||
+ nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) ||
+ nla_put_u32(skb, IFLA_NUM_TX_QUEUES,
+ READ_ONCE(dev->num_tx_queues)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SEGS,
+ READ_ONCE(dev->gso_max_segs)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SIZE,
+ READ_ONCE(dev->gso_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_MAX_SIZE,
+ READ_ONCE(dev->gro_max_size)) ||
+ nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gso_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gro_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE,
+ READ_ONCE(dev->tso_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
+ READ_ONCE(dev->tso_max_segs)) ||
#ifdef CONFIG_RPS
- nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
+ nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
+ READ_ONCE(dev->num_rx_queues)) ||
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (qdisc &&
- nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -1876,9 +1891,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_ifmap(skb, dev))
- goto nla_put_failure;
-
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
@@ -1911,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
- goto nla_put_failure;
-
if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
@@ -1926,12 +1935,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
+ if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC))
+ goto nla_put_failure_rcu;
+ qdisc = rcu_dereference(dev->qdisc);
+ if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id))
+ goto nla_put_failure_rcu;
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
- rcu_read_unlock();
-
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure_rcu;
if (rtnl_fill_prop_list(skb, dev))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
if (dev->dev.parent &&
nla_put_string(skb, IFLA_PARENT_DEV_NAME,
@@ -2200,25 +2215,22 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
- struct hlist_head *head;
+ unsigned int flags = NLM_F_MULTI;
struct nlattr *tb[IFLA_MAX+1];
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
+ struct net *tgt_net = net;
u32 ext_filter_mask = 0;
- const struct rtnl_link_ops *kind_ops = NULL;
- unsigned int flags = NLM_F_MULTI;
+ struct net_device *dev;
int master_idx = 0;
int netnsid = -1;
int err, i;
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
if (err < 0) {
if (cb->strict_check)
@@ -2262,36 +2274,18 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
flags |= NLM_F_DUMP_FILTERED;
walk_entries:
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (link_dump_filtered(dev, master_idx, kind_ops))
- goto cont;
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_ifinfo(skb, dev, net,
- RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq, 0, flags,
- ext_filter_mask, 0, NULL, 0,
- netnsid, GFP_KERNEL);
-
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
-cont:
- idx++;
- }
+ err = 0;
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ if (link_dump_filtered(dev, master_idx, kind_ops))
+ continue;
+ err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq, 0, flags,
+ ext_filter_mask, 0, NULL, 0,
+ netnsid, GFP_KERNEL);
+ if (err < 0)
+ break;
}
-out:
- err = skb->len;
-out_err:
- cb->args[1] = idx;
- cb->args[0] = h;
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (netnsid >= 0)
@@ -2552,7 +2546,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
- nla_len(attr) < NLA_HDRLEN) {
+ nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) {
return -EINVAL;
}
if (len >= MAX_VLAN_LIST_LEN)
@@ -2983,11 +2977,9 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_LINKMODE]) {
unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
- write_lock(&dev_base_lock);
if (dev->link_mode ^ value)
status |= DO_SETLINK_NOTIFY;
- dev->link_mode = value;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->link_mode, value);
}
if (tb[IFLA_VFINFO_LIST]) {
@@ -3296,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb);
+ dev = rtnl_dev_get(tgt_net, tb);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
@@ -5269,15 +5261,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
- nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
- if (nla_len(attr) < sizeof(flags))
- return -EINVAL;
+ nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec,
+ rem) {
+ if (nla_len(attr) < sizeof(flags))
+ return -EINVAL;
- have_flags = true;
- flags = nla_get_u16(attr);
- break;
- }
+ have_flags = true;
+ flags = nla_get_u16(attr);
+ break;
}
}
@@ -5986,19 +5977,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
- int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
- struct hlist_head *head;
+ struct {
+ unsigned long ifindex;
+ int idxattr;
+ int prividx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
- int idx = 0;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
- s_idxattr = cb->args[2];
- s_prividx = cb->args[3];
+ int err;
cb->seq = net->dev_base_seq;
@@ -6017,39 +6006,26 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- flags, &filters,
- &s_idxattr, &s_prividx,
- extack);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ flags, &filters,
+ &ctx->idxattr, &ctx->prividx,
+ extack);
+ /* If we ran out of room on the first message,
+ * we're in trouble.
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
- s_prividx = 0;
- s_idxattr = 0;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
+ if (err < 0)
+ break;
+ ctx->prividx = 0;
+ ctx->idxattr = 0;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
-out:
- cb->args[3] = s_prividx;
- cb->args[2] = s_idxattr;
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void rtnl_offload_xstats_notify(struct net_device *dev)
@@ -6508,6 +6484,46 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Process one rtnetlink message. */
+static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ rtnl_dumpit_func dumpit = cb->data;
+ int err;
+
+ /* Previous iteration have already finished, avoid calling->dumpit()
+ * again, it may not expect to be called after it reached the end.
+ */
+ if (!dumpit)
+ return 0;
+
+ err = dumpit(skb, cb);
+
+ /* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
+ * Some applications which parse netlink manually depend on this.
+ */
+ if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+ if (err < 0 && err != -EMSGSIZE)
+ return err;
+ if (!err)
+ cb->data = NULL;
+
+ return skb->len;
+ }
+ return err;
+}
+
+static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *control)
+{
+ if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+ WARN_ON(control->data);
+ control->data = control->dump;
+ control->dump = rtnl_dumpit;
+ }
+
+ return netlink_dump_start(ssk, skb, nlh, control);
+}
+
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -6552,6 +6568,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
owner = link->owner;
dumpit = link->dumpit;
+ flags = link->flags;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
@@ -6569,8 +6586,9 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
.module = owner,
+ .flags = flags,
};
- err = netlink_dump_start(rtnl, skb, nlh, &c);
+ err = rtnetlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
* module if dump is still in progress.
*/
@@ -6716,7 +6734,7 @@ void __init rtnetlink_init(void)
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, 0);
+ rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
diff --git a/net/core/scm.c b/net/core/scm.c
index d0e0852a24..4f6a14babe 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
#include <net/compat.h>
#include <net/scm.h>
#include <net/cls_cgroup.h>
+#include <net/af_unix.h>
/*
@@ -85,8 +86,15 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
+#if IS_ENABLED(CONFIG_UNIX)
+ fpl->inflight = false;
+ fpl->dead = false;
+ fpl->edges = NULL;
+ INIT_LIST_HEAD(&fpl->vertices);
+#endif
}
fpp = &fpl->fp[fpl->count];
@@ -109,6 +117,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fput(file);
return -EINVAL;
}
+ if (unix_get_socket(file))
+ fpl->count_unix++;
+
*fpp++ = file;
fpl->count++;
}
@@ -371,8 +382,14 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
+
new_fpl->max = new_fpl->count;
new_fpl->user = get_uid(fpl->user);
+#if IS_ENABLED(CONFIG_UNIX)
+ new_fpl->inflight = false;
+ new_fpl->edges = NULL;
+ INIT_LIST_HEAD(&new_fpl->vertices);
+#endif
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 71dee435d5..466999a751 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -51,6 +51,7 @@
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <linux/splice.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
@@ -69,6 +70,7 @@
#include <net/sock.h>
#include <net/checksum.h>
#include <net/gso.h>
+#include <net/hotdata.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
@@ -88,15 +90,10 @@
#include "dev.h"
#include "sock_destructor.h"
-struct kmem_cache *skbuff_cache __ro_after_init;
-static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-
-static struct kmem_cache *skb_small_head_cache __ro_after_init;
-
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
@@ -112,8 +109,23 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
#define SKB_SMALL_HEAD_HEADROOM \
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
-EXPORT_SYMBOL(sysctl_max_skb_frags);
+/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
+ * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
+ * netmem is a page.
+ */
+static_assert(offsetof(struct bio_vec, bv_page) ==
+ offsetof(skb_frag_t, netmem));
+static_assert(sizeof_field(struct bio_vec, bv_page) ==
+ sizeof_field(skb_frag_t, netmem));
+
+static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
+static_assert(sizeof_field(struct bio_vec, bv_len) ==
+ sizeof_field(skb_frag_t, len));
+
+static_assert(offsetof(struct bio_vec, bv_offset) ==
+ offsetof(skb_frag_t, offset));
+static_assert(sizeof_field(struct bio_vec, bv_offset) ==
+ sizeof_field(skb_frag_t, offset));
#undef FN
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
@@ -297,7 +309,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
fragsz = SKB_DATA_ALIGN(fragsz);
- return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
+ align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
@@ -309,13 +322,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
if (in_hardirq() || irqs_disabled()) {
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
- data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
+ data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+ align_mask);
} else {
struct napi_alloc_cache *nc;
local_bh_disable();
nc = this_cpu_ptr(&napi_alloc_cache);
- data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+ data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
+ align_mask);
local_bh_enable();
}
return data;
@@ -328,7 +343,7 @@ static struct sk_buff *napi_skb_cache_get(void)
struct sk_buff *skb;
if (unlikely(!nc->skb_count)) {
- nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache,
+ nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
@@ -337,7 +352,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache));
+ kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache));
return skb;
}
@@ -395,7 +410,7 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -446,7 +461,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -557,7 +572,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj_size = SKB_HEAD_ALIGN(*size);
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
- obj = kmem_cache_alloc_node(skb_small_head_cache,
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
*size = SKB_SMALL_HEAD_CACHE_SIZE;
@@ -565,7 +580,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
goto out;
/* Try again but now we are using pfmemalloc reserves */
ret_pfmemalloc = true;
- obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
goto out;
}
@@ -628,7 +643,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_cache;
+ ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
@@ -758,10 +773,9 @@ skb_fail:
EXPORT_SYMBOL(__netdev_alloc_skb);
/**
- * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
* @napi: napi instance this buffer was allocated for
* @len: length to allocate
- * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
*
* Allocate a new sk_buff for use in NAPI receive. This buffer will
* attempt to allocate the head from a special reserved region used
@@ -770,9 +784,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
- gfp_t gfp_mask)
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
{
+ gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN;
struct napi_alloc_cache *nc;
struct sk_buff *skb;
bool pfmemalloc;
@@ -843,19 +857,19 @@ skb_success:
skb_fail:
return skb;
}
-EXPORT_SYMBOL(__napi_alloc_skb);
+EXPORT_SYMBOL(napi_alloc_skb);
-void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
- int size, unsigned int truesize)
+void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
+ int off, int size, unsigned int truesize)
{
DEBUG_NET_WARN_ON_ONCE(size > truesize);
- skb_fill_page_desc(skb, i, page, off, size);
+ skb_fill_netmem_desc(skb, i, netmem, off, size);
skb->len += size;
skb->data_len += size;
skb->truesize += truesize;
}
-EXPORT_SYMBOL(skb_add_rx_frag);
+EXPORT_SYMBOL(skb_add_rx_frag_netmem);
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize)
@@ -895,12 +909,101 @@ static bool is_pp_page(struct page *page)
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}
+int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
+ unsigned int headroom)
+{
#if IS_ENABLED(CONFIG_PAGE_POOL)
-bool napi_pp_put_page(struct page *page, bool napi_safe)
+ u32 size, truesize, len, max_head_size, off;
+ struct sk_buff *skb = *pskb, *nskb;
+ int err, i, head_off;
+ void *data;
+
+ /* XDP does not support fraglist so we need to linearize
+ * the skb.
+ */
+ if (skb_has_frag_list(skb))
+ return -EOPNOTSUPP;
+
+ max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom);
+ if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
+ return -ENOMEM;
+
+ size = min_t(u32, skb->len, max_head_size);
+ truesize = SKB_HEAD_ALIGN(size) + headroom;
+ data = page_pool_dev_alloc_va(pool, &truesize);
+ if (!data)
+ return -ENOMEM;
+
+ nskb = napi_build_skb(data, truesize);
+ if (!nskb) {
+ page_pool_free_va(pool, data, true);
+ return -ENOMEM;
+ }
+
+ skb_reserve(nskb, headroom);
+ skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
+
+ err = skb_copy_bits(skb, 0, nskb->data, size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+ skb_put(nskb, size);
+
+ head_off = skb_headroom(nskb) - skb_headroom(skb);
+ skb_headers_offset_update(nskb, head_off);
+
+ off = size;
+ len = skb->len - off;
+ for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
+ struct page *page;
+ u32 page_off;
+
+ size = min_t(u32, len, PAGE_SIZE);
+ truesize = size;
+
+ page = page_pool_dev_alloc(pool, &page_off, &truesize);
+ if (!page) {
+ consume_skb(nskb);
+ return -ENOMEM;
+ }
+
+ skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
+ err = skb_copy_bits(skb, off, page_address(page) + page_off,
+ size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+
+ len -= size;
+ off += size;
+ }
+
+ consume_skb(skb);
+ *pskb = nskb;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+EXPORT_SYMBOL(skb_pp_cow_data);
+
+int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+ struct bpf_prog *prog)
{
- bool allow_direct = false;
- struct page_pool *pp;
+ if (!prog->aux->xdp_has_frags)
+ return -EINVAL;
+
+ return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM);
+}
+EXPORT_SYMBOL(skb_cow_data_for_xdp);
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_put_page(struct page *page)
+{
page = compound_head(page);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
@@ -913,38 +1016,18 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
if (unlikely(!is_pp_page(page)))
return false;
- pp = page->pp;
-
- /* Allow direct recycle if we have reasons to believe that we are
- * in the same context as the consumer would run, so there's
- * no possible race.
- * __page_pool_put_page() makes sure we're not in hardirq context
- * and interrupts are enabled prior to accessing the cache.
- */
- if (napi_safe || in_softirq()) {
- const struct napi_struct *napi = READ_ONCE(pp->p.napi);
-
- allow_direct = napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
- }
-
- /* Driver set this to memory recycling info. Reset it on recycle.
- * This will *not* work for NIC using a split-page memory model.
- * The page will be returned to the pool here regardless of the
- * 'flipped' fragment being in use or not.
- */
- page_pool_put_full_page(pp, page, allow_direct);
+ page_pool_put_full_page(page->pp, page, false);
return true;
}
EXPORT_SYMBOL(napi_pp_put_page);
#endif
-static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
+static bool skb_pp_recycle(struct sk_buff *skb, void *data)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return napi_pp_put_page(virt_to_page(data), napi_safe);
+ return napi_pp_put_page(virt_to_page(data));
}
/**
@@ -981,17 +1064,17 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
- kmem_cache_free(skb_small_head_cache, head);
+ kmem_cache_free(net_hotdata.skb_small_head_cache, head);
else
kfree(head);
}
-static void skb_free_head(struct sk_buff *skb, bool napi_safe)
+static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
if (skb->head_frag) {
- if (skb_pp_recycle(skb, head, napi_safe))
+ if (skb_pp_recycle(skb, head))
return;
skb_free_frag(head);
} else {
@@ -999,15 +1082,12 @@ static void skb_free_head(struct sk_buff *skb, bool napi_safe)
}
}
-static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
- bool napi_safe)
+static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
- if (skb->cloned &&
- atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &shinfo->dataref))
+ if (!skb_data_unref(skb, shinfo))
goto exit;
if (skb_zcopy(skb)) {
@@ -1019,13 +1099,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
}
for (i = 0; i < shinfo->nr_frags; i++)
- napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
free_head:
if (shinfo->frag_list)
kfree_skb_list_reason(shinfo->frag_list, reason);
- skb_free_head(skb, napi_safe);
+ skb_free_head(skb);
exit:
/* When we clone an SKB we copy the reycling bit. The pp_recycle
* bit is only set on the head though, so in order to avoid races
@@ -1048,7 +1128,7 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
return;
case SKB_FCLONE_ORIG:
@@ -1069,7 +1149,7 @@ static void kfree_skbmem(struct sk_buff *skb)
if (!refcount_dec_and_test(&fclones->fclone_ref))
return;
fastpath:
- kmem_cache_free(skbuff_fclone_cache, fclones);
+ kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones);
}
void skb_release_head_state(struct sk_buff *skb)
@@ -1086,12 +1166,11 @@ void skb_release_head_state(struct sk_buff *skb)
}
/* Free everything but the sk_buff shell. */
-static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
- bool napi_safe)
+static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
{
skb_release_head_state(skb);
if (likely(skb->head))
- skb_release_data(skb, reason, napi_safe);
+ skb_release_data(skb, reason);
}
/**
@@ -1105,7 +1184,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
void __kfree_skb(struct sk_buff *skb)
{
- skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
+ skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
kfree_skbmem(skb);
}
EXPORT_SYMBOL(__kfree_skb);
@@ -1162,11 +1241,11 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
return;
}
- skb_release_all(skb, reason, false);
+ skb_release_all(skb, reason);
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
- kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE,
sa->skb_array);
sa->skb_count = 0;
}
@@ -1191,7 +1270,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
}
if (sa.skb_count)
- kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array);
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1223,22 +1302,28 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
has_trans = skb_transport_header_was_set(skb);
printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
- "mac=(%d,%d) net=(%d,%d) trans=%d\n"
+ "mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n"
"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
- "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
- "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
+ "csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
+ "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n"
+ "priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n"
+ "encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n",
level, skb->len, headroom, skb_headlen(skb), tailroom,
has_mac ? skb->mac_header : -1,
has_mac ? skb_mac_header_len(skb) : -1,
+ skb->mac_len,
skb->network_header,
has_trans ? skb_network_header_len(skb) : -1,
has_trans ? skb->transport_header : -1,
sh->tx_flags, sh->nr_frags,
sh->gso_size, sh->gso_type, sh->gso_segs,
- skb->csum, skb->ip_summed, skb->csum_complete_sw,
- skb->csum_valid, skb->csum_level,
+ skb->csum, skb->csum_start, skb->csum_offset, skb->ip_summed,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level,
skb->hash, skb->sw_hash, skb->l4_hash,
- ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif,
+ skb->priority, skb->mark, skb->alloc_cpu, skb->vlan_all,
+ skb->encapsulation, skb->inner_protocol, skb->inner_mac_header,
+ skb->inner_network_header, skb->inner_transport_header);
if (dev)
printk("%sdev name=%s feat=%pNF\n",
@@ -1336,7 +1421,7 @@ EXPORT_SYMBOL(consume_skb);
void __consume_stateless_skb(struct sk_buff *skb)
{
trace_consume_skb(skb, __builtin_return_address(0));
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
kfree_skbmem(skb);
}
@@ -1353,9 +1438,9 @@ static void napi_skb_cache_put(struct sk_buff *skb)
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
kasan_mempool_unpoison_object(nc->skb_cache[i],
- kmem_cache_size(skbuff_cache));
+ kmem_cache_size(net_hotdata.skbuff_cache));
- kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
@@ -1363,7 +1448,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
{
- skb_release_all(skb, reason, true);
+ skb_release_all(skb, reason);
napi_skb_cache_put(skb);
}
@@ -1401,7 +1486,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- skb_release_all(skb, SKB_CONSUMED, !!budget);
+ skb_release_all(skb, SKB_CONSUMED);
napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
@@ -1532,7 +1617,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
*/
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
{
- skb_release_all(dst, SKB_CONSUMED, false);
+ skb_release_all(dst, SKB_CONSUMED);
return __skb_clone(dst, src);
}
EXPORT_SYMBOL_GPL(skb_morph);
@@ -1600,7 +1685,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
- uarg->ubuf.callback = msg_zerocopy_callback;
+ uarg->ubuf.ops = &msg_zerocopy_ubuf_ops;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
@@ -1626,7 +1711,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
u32 bytelen, next;
/* there might be non MSG_ZEROCOPY users */
- if (uarg->callback != msg_zerocopy_callback)
+ if (uarg->ops != &msg_zerocopy_ubuf_ops)
return NULL;
/* realloc only when socket is locked (TCP, UDP cork),
@@ -1737,8 +1822,8 @@ release:
sock_put(sk);
}
-void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
- bool success)
+static void msg_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *uarg,
+ bool success)
{
struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
@@ -1747,7 +1832,6 @@ void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
if (refcount_dec_and_test(&uarg->refcnt))
__msg_zerocopy_callback(uarg_zc);
}
-EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
@@ -1757,10 +1841,15 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
uarg_to_msgzc(uarg)->len--;
if (have_uref)
- msg_zerocopy_callback(NULL, uarg, true);
+ msg_zerocopy_complete(NULL, uarg, true);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
+const struct ubuf_info_ops msg_zerocopy_ubuf_ops = {
+ .complete = msg_zerocopy_complete,
+};
+EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops);
+
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
@@ -1768,11 +1857,18 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct ubuf_info *orig_uarg = skb_zcopy(skb);
int err, orig_len = skb->len;
- /* An skb can only point to one uarg. This edge case happens when
- * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
- */
- if (orig_uarg && uarg != orig_uarg)
- return -EEXIST;
+ if (uarg->ops->link_skb) {
+ err = uarg->ops->link_skb(skb, uarg);
+ if (err)
+ return err;
+ } else {
+ /* An skb can only point to one uarg. This edge case happens
+ * when TCP appends to an skb, but zerocopy_realloc triggered
+ * a new alloc.
+ */
+ if (orig_uarg && uarg != orig_uarg)
+ return -EEXIST;
+ }
err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
@@ -1786,7 +1882,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return err;
}
- skb_zcopy_set(skb, uarg, NULL);
+ if (!uarg->ops->link_skb)
+ skb_zcopy_set(skb, uarg, NULL);
return skb->len - orig_len;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
@@ -1906,10 +2003,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags point to kernel buffers */
for (i = 0; i < new_frags - 1; i++) {
- __skb_fill_page_desc(skb, i, head, 0, psize);
+ __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
head = (struct page *)page_private(head);
}
- __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+ __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
+ d_off);
skb_shinfo(skb)->nr_frags = new_frags;
release:
@@ -1951,7 +2049,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- n = kmem_cache_alloc(skbuff_cache, gfp_mask);
+ n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask);
if (!n)
return NULL;
@@ -2014,11 +2112,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
- int headerlen = skb_headroom(skb);
- unsigned int size = skb_end_offset(skb) + skb->data_len;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ struct sk_buff *n;
+ unsigned int size;
+ int headerlen;
+
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+ headerlen = skb_headroom(skb);
+ size = skb_end_offset(skb) + skb->data_len;
+ n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
return NULL;
@@ -2163,9 +2267,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
} else {
- skb_free_head(skb, false);
+ skb_free_head(skb);
}
off = (data + nhead) - skb->head;
@@ -2346,12 +2450,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
- gfp_mask, skb_alloc_rx_flag(skb),
- NUMA_NO_NODE);
- int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
+ struct sk_buff *n;
+ int oldheadroom;
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+
+ oldheadroom = skb_headroom(skb);
+ n = __alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
if (!n)
return NULL;
@@ -3647,7 +3756,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
if (plen) {
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- __skb_fill_page_desc(to, 0, page, offset, plen);
+ __skb_fill_netmem_desc(to, 0, page_to_netmem(page),
+ offset, plen);
get_page(page);
j = 1;
len -= plen;
@@ -4889,7 +4999,7 @@ static void skb_extensions_init(void) {}
void __init skb_init(void)
{
- skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
+ net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|
@@ -4897,7 +5007,7 @@ void __init skb_init(void)
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
- skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
@@ -4906,7 +5016,7 @@ void __init skb_init(void)
* struct skb_shared_info is located at the end of skb->head,
* and should not be copied to/from user.
*/
- skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
+ net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
SKB_SMALL_HEAD_CACHE_SIZE,
0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC,
@@ -5779,7 +5889,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen) {
skb_release_head_state(skb);
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
} else {
__kfree_skb(skb);
}
@@ -6465,12 +6575,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
} else {
/* we can reuse existing recount- all we did was
* relocate values
*/
- skb_free_head(skb, false);
+ skb_free_head(skb);
}
skb->head = data;
@@ -6605,7 +6715,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_kfree_head(data, size);
return -ENOMEM;
}
- skb_release_data(skb, SKB_CONSUMED, false);
+ skb_release_data(skb, SKB_CONSUMED);
skb->head = data;
skb->head_frag = 0;
@@ -6885,6 +6995,19 @@ free_now:
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */
+static void kfree_skb_napi_cache(struct sk_buff *skb)
+{
+ /* if SKB is a clone, don't handle this case */
+ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
+ __kfree_skb(skb);
+ return;
+ }
+
+ local_bh_disable();
+ __napi_kfree_skb(skb, SKB_CONSUMED);
+ local_bh_enable();
+}
+
/**
* skb_attempt_defer_free - queue skb for remote freeing
* @skb: buffer
@@ -6900,10 +7023,10 @@ void skb_attempt_defer_free(struct sk_buff *skb)
unsigned int defer_max;
bool kick;
- if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
- !cpu_online(cpu) ||
- cpu == raw_smp_processor_id()) {
-nodefer: __kfree_skb(skb);
+ if (cpu == raw_smp_processor_id() ||
+ WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
+ !cpu_online(cpu)) {
+nodefer: kfree_skb_napi_cache(skb);
return;
}
@@ -6911,7 +7034,7 @@ nodefer: __kfree_skb(skb);
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
sd = &per_cpu(softnet_data, cpu);
- defer_max = READ_ONCE(sysctl_skb_defer_max);
+ defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
if (READ_ONCE(sd->defer_count) >= defer_max)
goto nodefer;
@@ -6929,8 +7052,8 @@ nodefer: __kfree_skb(skb);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely).
*/
- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
- smp_call_function_single_async(cpu, &sd->defer_csd);
+ if (unlikely(kick))
+ kick_defer_list_purge(sd, cpu);
}
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
@@ -6963,7 +7086,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
ssize_t maxsize, gfp_t gfp)
{
- size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+ size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
ssize_t spliced = 0, ret = 0;
unsigned int i;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4d75ef9d24..bbf40b9997 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (copy)
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
@@ -1226,11 +1227,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock) {
- read_lock_bh(&sk->sk_callback_lock);
+ if (psock)
sk_psock_data_ready(sk, psock);
- read_unlock_bh(&sk->sk_callback_lock);
- }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 9cf404e803..100e975073 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -127,6 +127,7 @@
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
@@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
int sysctl_tstamp_allow_data __read_mostly = 1;
@@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
@@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
@@ -2053,8 +2053,9 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
- memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
- prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+ unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+ prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
+ /* alloc is larger than struct, see sk_prot_alloc() */);
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
@@ -2525,13 +2526,12 @@ EXPORT_SYMBOL(skb_set_owner_w);
static bool can_skb_orphan_partial(const struct sk_buff *skb)
{
-#ifdef CONFIG_TLS_DEVICE
/* Drivers depend on in-order delivery for crypto offload,
* partial orphan breaks out-of-order-OK logic.
*/
- if (skb->decrypted)
+ if (skb_is_decrypted(skb))
return false;
-#endif
+
return (skb->destructor == sock_wfree ||
(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
}
@@ -2583,8 +2583,18 @@ EXPORT_SYMBOL(sock_efree);
#ifdef CONFIG_INET
void sock_pfree(struct sk_buff *skb)
{
- if (sk_is_refcounted(skb->sk))
- sock_gen_put(skb->sk);
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk))
+ return;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ inet_reqsk(sk)->rsk_listener = NULL;
+ reqsk_free(inet_reqsk(sk));
+ return;
+ }
+
+ sock_gen_put(sk);
}
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
@@ -3231,8 +3241,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
}
EXPORT_SYMBOL(sock_no_socketpair);
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int sock_no_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
return -EOPNOTSUPP;
}
@@ -3327,7 +3337,7 @@ static void sock_def_error_report(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, EPOLLERR);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
+ sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
}
@@ -3342,7 +3352,7 @@ void sock_def_readable(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -3362,7 +3372,7 @@ static void sock_def_write_space(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@@ -3387,7 +3397,7 @@ static void sock_def_write_space_wfree(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
}
@@ -3732,6 +3742,9 @@ void sk_common_release(struct sock *sk)
sk->sk_prot->unhash(sk);
+ if (sk->sk_socket)
+ sk->sk_socket->sk = NULL;
+
/*
* In this point socket cannot receive new packets, but it is possible
* that some packets are in flight because some CPU runs receiver and
@@ -4224,3 +4237,65 @@ int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return sock_ioctl_out(sk, cmd, arg);
}
EXPORT_SYMBOL(sk_ioctl);
+
+static int __init sock_struct_check(void)
+{
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
+
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
+ return 0;
+}
+
+core_initcall(sock_struct_check);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c53b731f2d..6541228380 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -16,9 +16,10 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
-static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
-static DEFINE_MUTEX(sock_diag_table_mutex);
+static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];
+
+static struct sock_diag_inet_compat __rcu *inet_rcv_compat;
+
static struct workqueue_struct *broadcast_wq;
DEFINE_COOKIE(sock_cookie);
@@ -122,6 +123,24 @@ static size_t sock_diag_nlmsg_size(void)
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
+static const struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+ const struct sock_diag_handler *handler;
+
+ rcu_read_lock();
+ handler = rcu_dereference(sock_diag_handlers[family]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
+
+ return handler;
+}
+
+static void sock_diag_unlock_handler(const struct sock_diag_handler *handler)
+{
+ module_put(handler->owner);
+}
+
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
@@ -138,12 +157,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work)
if (!skb)
goto out;
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[sk->sk_family];
- if (hndl && hndl->get_info)
- err = hndl->get_info(skb, sk);
- mutex_unlock(&sock_diag_table_mutex);
-
+ hndl = sock_diag_lock_handler(sk->sk_family);
+ if (hndl) {
+ if (hndl->get_info)
+ err = hndl->get_info(skb, sk);
+ sock_diag_unlock_handler(hndl);
+ }
if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
@@ -166,51 +185,45 @@ void sock_diag_broadcast_destroy(struct sock *sk)
queue_work(broadcast_wq, &bsk->work);
}
-void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = fn;
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
-void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = NULL;
- mutex_unlock(&sock_diag_table_mutex);
+ const struct sock_diag_inet_compat *old;
+
+ old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ NULL);
+ WARN_ON_ONCE(old != ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
int sock_diag_register(const struct sock_diag_handler *hndl)
{
- int err = 0;
+ int family = hndl->family;
- if (hndl->family >= AF_MAX)
+ if (family >= AF_MAX)
return -EINVAL;
- mutex_lock(&sock_diag_table_mutex);
- if (sock_diag_handlers[hndl->family])
- err = -EBUSY;
- else
- WRITE_ONCE(sock_diag_handlers[hndl->family], hndl);
- mutex_unlock(&sock_diag_table_mutex);
-
- return err;
+ return !cmpxchg((const struct sock_diag_handler **)
+ &sock_diag_handlers[family],
+ NULL, hndl) ? 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(sock_diag_register);
-void sock_diag_unregister(const struct sock_diag_handler *hnld)
+void sock_diag_unregister(const struct sock_diag_handler *hndl)
{
- int family = hnld->family;
+ int family = hndl->family;
if (family >= AF_MAX)
return;
- mutex_lock(&sock_diag_table_mutex);
- BUG_ON(sock_diag_handlers[family] != hnld);
- WRITE_ONCE(sock_diag_handlers[family], NULL);
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((const struct sock_diag_handler **)&sock_diag_handlers[family],
+ NULL);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
@@ -227,20 +240,20 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
- if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL)
+ if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family]))
sock_load_diag_module(req->sdiag_family, 0);
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[req->sdiag_family];
+ hndl = sock_diag_lock_handler(req->sdiag_family);
if (hndl == NULL)
- err = -ENOENT;
- else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
+ return -ENOENT;
+
+ if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
err = hndl->destroy(skb, nlh);
else
err = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ sock_diag_unlock_handler(hndl);
return err;
}
@@ -248,20 +261,27 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ const struct sock_diag_inet_compat *ptr;
int ret;
switch (nlh->nlmsg_type) {
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
- if (inet_rcv_compat == NULL)
+
+ if (!rcu_access_pointer(inet_rcv_compat))
sock_load_diag_module(AF_INET, 0);
- mutex_lock(&sock_diag_table_mutex);
- if (inet_rcv_compat != NULL)
- ret = inet_rcv_compat(skb, nlh);
- else
- ret = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ rcu_read_lock();
+ ptr = rcu_dereference(inet_rcv_compat);
+ if (ptr && !try_module_get(ptr->owner))
+ ptr = NULL;
+ rcu_read_unlock();
+
+ ret = -EOPNOTSUPP;
+ if (ptr) {
+ ret = ptr->fn(skb, nlh);
+ module_put(ptr->owner);
+ }
return ret;
case SOCK_DIAG_BY_FAMILY:
@@ -272,13 +292,9 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
-static DEFINE_MUTEX(sock_diag_mutex);
-
static void sock_diag_rcv(struct sk_buff *skb)
{
- mutex_lock(&sock_diag_mutex);
netlink_rcv_skb(skb, &sock_diag_rcv_msg);
- mutex_unlock(&sock_diag_mutex);
}
static int sock_diag_bind(struct net *net, int group)
@@ -286,12 +302,12 @@ static int sock_diag_bind(struct net *net, int group)
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
- if (!READ_ONCE(sock_diag_handlers[AF_INET]))
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET]))
sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
- if (!READ_ONCE(sock_diag_handlers[AF_INET6]))
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET6]))
sock_load_diag_module(AF_INET6, 0);
break;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8598466a38..d3dbb92153 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,8 +24,16 @@ struct bpf_stab {
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+/* This mutex is used to
+ * - protect race between prog/link attach/detach and link prog update, and
+ * - protect race between releasing and accessing map in bpf_link.
+ * A single global mutex lock is used since it is expected contention is low.
+ */
+static DEFINE_MUTEX(sockmap_mutex);
+
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which);
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which);
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
@@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
fdput(f);
return ret;
}
@@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
goto put_prog;
}
- ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
put_prog:
bpf_prog_put(prog);
put_map:
@@ -411,9 +423,6 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -936,9 +945,6 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
- if (irqs_disabled())
- return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
@@ -1460,55 +1466,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
- u32 which)
+static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ struct bpf_link ***plink, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **cur_pprog;
+ struct bpf_link **cur_plink;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- *pprog = &progs->msg_parser;
+ cur_pprog = &progs->msg_parser;
+ cur_plink = &progs->msg_parser_link;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- *pprog = &progs->stream_parser;
+ cur_pprog = &progs->stream_parser;
+ cur_plink = &progs->stream_parser_link;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- *pprog = &progs->stream_verdict;
+ cur_pprog = &progs->stream_verdict;
+ cur_plink = &progs->stream_verdict_link;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- *pprog = &progs->skb_verdict;
+ cur_pprog = &progs->skb_verdict;
+ cur_plink = &progs->skb_verdict_link;
break;
default:
return -EOPNOTSUPP;
}
+ *pprog = cur_pprog;
+ if (plink)
+ *plink = cur_plink;
return 0;
}
+/* Handle the following four cases:
+ * prog_attach: prog != NULL, old == NULL, link == NULL
+ * prog_detach: prog == NULL, old != NULL, link == NULL
+ * link_attach: prog != NULL, old == NULL, link != NULL
+ * link_detach: prog == NULL, old != NULL, link != NULL
+ */
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which)
{
struct bpf_prog **pprog;
+ struct bpf_link **plink;
int ret;
- ret = sock_map_prog_lookup(map, &pprog, which);
+ ret = sock_map_prog_link_lookup(map, &pprog, &plink, which);
if (ret)
return ret;
- if (old)
- return psock_replace_prog(pprog, prog, old);
+ /* for prog_attach/prog_detach/link_attach, return error if a bpf_link
+ * exists for that prog.
+ */
+ if ((!link || prog) && *plink)
+ return -EBUSY;
- psock_set_prog(pprog, prog);
- return 0;
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (!ret)
+ *plink = NULL;
+ } else {
+ psock_set_prog(pprog, prog);
+ if (link)
+ *plink = link;
+ }
+
+ return ret;
}
int sock_map_bpf_prog_query(const union bpf_attr *attr,
@@ -1533,7 +1568,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
rcu_read_lock();
- ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type);
if (ret)
goto end;
@@ -1639,19 +1674,23 @@ void sock_map_close(struct sock *sk, long timeout)
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock_get(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- saved_close = READ_ONCE(sk->sk_prot)->close;
- } else {
+ psock = sk_psock(sk);
+ if (likely(psock)) {
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ goto no_psock;
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
+ } else {
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+no_psock:
+ rcu_read_unlock();
+ release_sock(sk);
}
/* Make sure we do not recurse. This is a bug.
@@ -1663,6 +1702,196 @@ void sock_map_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL_GPL(sock_map_close);
+struct sockmap_link {
+ struct bpf_link link;
+ struct bpf_map *map;
+ enum bpf_attach_type attach_type;
+};
+
+static void sock_map_link_release(struct bpf_link *link)
+{
+ struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+
+ mutex_lock(&sockmap_mutex);
+ if (!sockmap_link->map)
+ goto out;
+
+ WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
+ sockmap_link->attach_type));
+
+ bpf_map_put_with_uref(sockmap_link->map);
+ sockmap_link->map = NULL;
+out:
+ mutex_unlock(&sockmap_mutex);
+}
+
+static int sock_map_link_detach(struct bpf_link *link)
+{
+ sock_map_link_release(link);
+ return 0;
+}
+
+static void sock_map_link_dealloc(struct bpf_link *link)
+{
+ kfree(link);
+}
+
+/* Handle the following two cases:
+ * case 1: link != NULL, prog != NULL, old != NULL
+ * case 2: link != NULL, prog != NULL, old == NULL
+ */
+static int sock_map_link_update_prog(struct bpf_link *link,
+ struct bpf_prog *prog,
+ struct bpf_prog *old)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ struct bpf_prog **pprog, *old_link_prog;
+ struct bpf_link **plink;
+ int ret = 0;
+
+ mutex_lock(&sockmap_mutex);
+
+ /* If old prog is not NULL, ensure old prog is the same as link->prog. */
+ if (old && link->prog != old) {
+ ret = -EPERM;
+ goto out;
+ }
+ /* Ensure link->prog has the same type/attach_type as the new prog. */
+ if (link->prog->type != prog->type ||
+ link->prog->expected_attach_type != prog->expected_attach_type) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
+ sockmap_link->attach_type);
+ if (ret)
+ goto out;
+
+ /* return error if the stored bpf_link does not match the incoming bpf_link. */
+ if (link != *plink) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (ret)
+ goto out;
+ } else {
+ psock_set_prog(pprog, prog);
+ }
+
+ bpf_prog_inc(prog);
+ old_link_prog = xchg(&link->prog, prog);
+ bpf_prog_put(old_link_prog);
+
+out:
+ mutex_unlock(&sockmap_mutex);
+ return ret;
+}
+
+static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link)
+{
+ u32 map_id = 0;
+
+ mutex_lock(&sockmap_mutex);
+ if (sockmap_link->map)
+ map_id = sockmap_link->map->id;
+ mutex_unlock(&sockmap_mutex);
+ return map_id;
+}
+
+static int sock_map_link_fill_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ info->sockmap.map_id = map_id;
+ info->sockmap.attach_type = sockmap_link->attach_type;
+ return 0;
+}
+
+static void sock_map_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ seq_printf(seq, "map_id:\t%u\n", map_id);
+ seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+}
+
+static const struct bpf_link_ops sock_map_link_ops = {
+ .release = sock_map_link_release,
+ .dealloc = sock_map_link_dealloc,
+ .detach = sock_map_link_detach,
+ .update_prog = sock_map_link_update_prog,
+ .fill_link_info = sock_map_link_fill_info,
+ .show_fdinfo = sock_map_link_show_fdinfo,
+};
+
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct sockmap_link *sockmap_link;
+ enum bpf_attach_type attach_type;
+ struct bpf_map *map;
+ int ret;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ map = bpf_map_get_with_uref(attr->link_create.target_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER);
+ if (!sockmap_link) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ attach_type = attr->link_create.attach_type;
+ bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+ sockmap_link->map = map;
+ sockmap_link->attach_type = attach_type;
+
+ ret = bpf_link_prime(&sockmap_link->link, &link_primer);
+ if (ret) {
+ kfree(sockmap_link);
+ goto out;
+ }
+
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type);
+ mutex_unlock(&sockmap_mutex);
+ if (ret) {
+ bpf_link_cleanup(&link_primer);
+ goto out;
+ }
+
+ /* Increase refcnt for the prog since when old prog is replaced with
+ * psock_replace_prog() and psock_set_prog() its refcnt will be decreased.
+ *
+ * Actually, we do not need to increase refcnt for the prog since bpf_link
+ * will hold a reference. But in order to have less complexity w.r.t.
+ * replacing/setting prog, let us increase the refcnt to make things simpler.
+ */
+ bpf_prog_inc(prog);
+
+ return bpf_link_settle(&link_primer);
+
+out:
+ bpf_map_put_with_uref(map);
+ return ret;
+}
+
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 986f15e5d6..c9fb9ad874 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,9 @@
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
+#include <net/rps.h>
#include "dev.h"
@@ -138,7 +141,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ orig_sock_table = rcu_dereference_protected(
+ net_hotdata.rps_sock_flow_table,
lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
@@ -159,7 +163,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_unlock(&sock_flow_mutex);
return -ENOMEM;
}
- rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
+ net_hotdata.rps_cpu_mask =
+ roundup_pow_of_two(nr_cpu_ids) - 1;
sock_table->mask = size - 1;
} else
sock_table = orig_sock_table;
@@ -170,7 +175,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
sock_table = NULL;
if (sock_table != orig_sock_table) {
- rcu_assign_pointer(rps_sock_flow_table, sock_table);
+ rcu_assign_pointer(net_hotdata.rps_sock_flow_table,
+ sock_table);
if (sock_table) {
static_branch_inc(&rps_needed);
static_branch_inc(&rfs_needed);
@@ -300,8 +306,8 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
weight = READ_ONCE(weight_p);
- WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
- WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias);
}
mutex_unlock(&dev_weight_mutex);
@@ -410,7 +416,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "mem_pcpu_rsv",
- .data = &sysctl_mem_pcpu_rsv,
+ .data = &net_hotdata.sysctl_mem_pcpu_rsv,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -439,7 +445,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_max_backlog",
- .data = &netdev_max_backlog,
+ .data = &net_hotdata.max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -498,7 +504,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_tstamp_prequeue",
- .data = &netdev_tstamp_prequeue,
+ .data = &net_hotdata.tstamp_prequeue,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -576,7 +582,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_budget",
- .data = &netdev_budget,
+ .data = &net_hotdata.netdev_budget,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -590,7 +596,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "max_skb_frags",
- .data = &sysctl_max_skb_frags,
+ .data = &net_hotdata.sysctl_max_skb_frags,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -599,7 +605,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_budget_usecs",
- .data = &netdev_budget_usecs,
+ .data = &net_hotdata.netdev_budget_usecs,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -632,7 +638,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "gro_normal_batch",
- .data = &gro_normal_batch,
+ .data = &net_hotdata.gro_normal_batch,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -649,13 +655,12 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "skb_defer_max",
- .data = &sysctl_skb_defer_max,
+ .data = &net_hotdata.sysctl_skb_defer_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
- { }
};
static struct ctl_table netns_core_table[] = {
@@ -692,7 +697,6 @@ static struct ctl_table netns_core_table[] = {
.extra2 = SYSCTL_ONE,
.proc_handler = proc_dou8vec_minmax,
},
- { }
};
static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
@@ -710,20 +714,21 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl, *tmp;
+ size_t table_size = ARRAY_SIZE(netns_core_table);
+ struct ctl_table *tbl;
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
+ int i;
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
- for (tmp = tbl; tmp->procname; tmp++)
- tmp->data += (char *)net - (char *)&init_net;
+ for (i = 0; i < table_size; ++i)
+ tbl[i].data += (char *)net - (char *)&init_net;
}
- net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl,
- ARRAY_SIZE(netns_core_table));
+ net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
if (net->core.sysctl_hdr == NULL)
goto err_reg;
@@ -738,7 +743,7 @@ err_dup:
static __net_exit void sysctl_core_net_exit(struct net *net)
{
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->core.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->core.sysctl_hdr);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4869c1c2d8..bcc5551c64 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -16,6 +16,7 @@
#include <linux/bug.h>
#include <net/page_pool/helpers.h>
+#include <net/hotdata.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
@@ -75,7 +76,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
/* Allow this ID to be reused */
- ida_simple_remove(&mem_id_pool, xa->mem.id);
+ ida_free(&mem_id_pool, xa->mem.id);
kfree(xa);
}
@@ -126,10 +127,8 @@ void xdp_unreg_mem_model(struct xdp_mem_info *mem)
return;
if (type == MEM_TYPE_PAGE_POOL) {
- rcu_read_lock();
- xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+ xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
page_pool_destroy(xa->page_pool);
- rcu_read_unlock();
}
}
EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
@@ -242,7 +241,7 @@ static int __mem_id_cyclic_get(gfp_t gfp)
int id;
again:
- id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+ id = ida_alloc_range(&mem_id_pool, mem_id_next, MEM_ID_MAX - 1, gfp);
if (id < 0) {
if (id == -ENOSPC) {
/* Cyclic allocator, reset next id */
@@ -294,10 +293,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
mutex_lock(&mem_id_lock);
ret = __mem_id_init_hash_table();
mutex_unlock(&mem_id_lock);
- if (ret < 0) {
- WARN_ON(1);
+ if (ret < 0)
return ERR_PTR(ret);
- }
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
@@ -317,7 +314,7 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
- ida_simple_remove(&mem_id_pool, mem->id);
+ ida_free(&mem_id_pool, mem->id);
mem->id = 0;
errno = PTR_ERR(ptr);
goto err;
@@ -589,7 +586,7 @@ EXPORT_SYMBOL_GPL(xdp_warn);
int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
{
- n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs);
+ n_skb = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, n_skb, skbs);
if (unlikely(!n_skb))
return -ENOMEM;
@@ -658,7 +655,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -771,11 +768,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
__bpf_kfunc_end_defs();
-BTF_SET8_START(xdp_metadata_kfunc_ids)
+BTF_KFUNCS_START(xdp_metadata_kfunc_ids)
#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
-BTF_SET8_END(xdp_metadata_kfunc_ids)
+BTF_KFUNCS_END(xdp_metadata_kfunc_ids)
static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c4bbac9974..1cba001bb4 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -376,15 +376,11 @@ EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
int __init dccp_ackvec_init(void)
{
- dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
- sizeof(struct dccp_ackvec), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_slab == NULL)
goto out_err;
- dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
- sizeof(struct dccp_ackvec_record),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_record_slab == NULL)
goto out_destroy_slab;
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index a3eeb84d16..e3d388c33d 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -13,7 +13,7 @@ config IP_DCCP_CCID2_DEBUG
config IP_DCCP_CCID3
bool "CCID-3 (TCP-Friendly)"
- def_bool y if (IP_DCCP = y || IP_DCCP = m)
+ default IP_DCCP = y || IP_DCCP = m
help
CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 4d9823d6dc..d6b30700af 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -353,6 +353,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* @sk: socket to perform estimator on
+ * @mrtt: measured RTT
*
* This code is almost identical with TCP's tcp_rtt_estimator(), since
* - it has a higher sampling frequency (recommended by RFC 1323),
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8a82c5a2c5..f5019d95c3 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler dccp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = dccp_diag_dump,
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 44b033fe1e..5926159a6f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -24,6 +24,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/netns/generic.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -521,7 +522,8 @@ out:
return err;
}
-static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
int err;
const struct iphdr *rxiph;
@@ -655,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v4_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
@@ -706,7 +711,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
kfree_skb(skb);
return 0;
}
@@ -869,7 +874,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -909,7 +914,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
@@ -1039,7 +1044,7 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
{
- inet_twsk_purge(&dccp_hashinfo, AF_INET);
+ inet_twsk_purge(&dccp_hashinfo);
}
static struct pernet_operations dccp_v4_ops = {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ded07e09f8..da5dba120b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/secure_seq.h>
#include <net/netns/generic.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include "dccp.h"
#include "ipv6.h"
@@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
@@ -398,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
@@ -656,7 +661,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -762,7 +767,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -801,7 +806,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
@@ -1119,15 +1124,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
- .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 64d805b27a..251a57cf58 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
inet_csk_reqsk_queue_drop(sk, req);
out:
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fd2eb148d2..5c2e24f3c3 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -204,7 +204,7 @@ void dccp_write_space(struct sock *sk)
wake_up_interruptible(&wq->wait);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index ee8d4f5afa..3fc474d6e5 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -90,8 +90,6 @@ static struct ctl_table dccp_default_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
-
- { }
};
static struct ctl_table_header *dccp_table_header;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 7f0b093208..f49cd83f19 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -314,7 +314,7 @@ static void devlink_release(struct work_struct *work)
mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
put_device(devlink->dev);
- kfree(devlink);
+ kvfree(devlink);
}
void devlink_put(struct devlink *devlink)
@@ -420,7 +420,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (!devlink_reload_actions_valid(ops))
return NULL;
- devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+ devlink = kvzalloc(struct_size(devlink, priv, priv_size), GFP_KERNEL);
if (!devlink)
return NULL;
@@ -455,7 +455,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
return devlink;
err_xa_alloc:
- kfree(devlink);
+ kvfree(devlink);
return NULL;
}
EXPORT_SYMBOL_GPL(devlink_alloc_ns);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 19dbf54074..13c73f50da 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -1202,23 +1202,19 @@ static void __devlink_compat_running_version(struct devlink *devlink,
if (err)
goto free_msg;
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ (void *)msg->data, msg->len, rem) {
const struct nlattr *kv;
int rem_kv;
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
+ nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ nlattr, rem_kv) {
strlcat(buf, nla_data(kv), len);
strlcat(buf, " ", len);
}
}
free_msg:
- nlmsg_free(msg);
+ nlmsg_consume(msg);
}
void devlink_compat_running_version(struct devlink *devlink,
diff --git a/net/devlink/param.c b/net/devlink/param.c
index 22bc3b5005..dcf0d1cceb 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -158,11 +158,12 @@ static int devlink_param_get(struct devlink *devlink,
static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
if (!param->set)
return -EOPNOTSUPP;
- return param->set(devlink, param->id, ctx);
+ return param->set(devlink, param->id, ctx, extack);
}
static int
@@ -571,7 +572,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return -EOPNOTSUPP;
ctx.val = value;
ctx.cmode = cmode;
- err = devlink_param_set(devlink, param, &ctx);
+ err = devlink_param_set(devlink, param, &ctx, info->extack);
if (err)
return err;
}
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 118d130d2a..be9158b445 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -16,6 +16,7 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
[DEVLINK_PORT_FN_ATTR_CAPS] =
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+ [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 },
};
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
@@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
return 0;
}
+static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ u32 max_io_eqs;
+ int err;
+
+ if (!port->ops->port_fn_max_io_eqs_get)
+ return 0;
+
+ err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, max_io_eqs);
+ if (err)
+ return err;
+ *msg_updated = true;
+ return 0;
+}
+
int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
{
if (devlink_nl_put_handle(msg, devlink_port->devlink))
@@ -410,6 +435,18 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
}
static int
+devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_io_eqs;
+
+ max_io_eqs = nla_get_u32(attr);
+ return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port,
+ max_io_eqs, extack);
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -430,6 +467,9 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
err = devlink_rel_devlink_handle_put(msg, port->devlink,
port->rel_index,
DEVLINK_PORT_FN_ATTR_DEVLINK,
@@ -726,6 +766,12 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
}
}
}
+ if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] &&
+ !ops->port_fn_max_io_eqs_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS],
+ "Function does not support max_io_eqs setting");
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -761,6 +807,13 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
+ attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS];
+ if (attr) {
+ err = devlink_port_fn_max_io_eqs_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c
index 431bf52290..0aac887d00 100644
--- a/net/dsa/devlink.c
+++ b/net/dsa/devlink.c
@@ -194,7 +194,8 @@ int dsa_devlink_param_get(struct devlink *dl, u32 id,
EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
int dsa_devlink_param_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dsa_devlink_to_ds(dl);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ac7be864e8..12521a7d40 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/of.h>
-#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <net/dsa_stubs.h>
#include <net/sch_generic.h>
@@ -626,7 +625,6 @@ static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds)
static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct device_node *dn;
int err;
if (ds->setup)
@@ -666,10 +664,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dsa_user_mii_bus_init(ds);
- dn = of_get_child_by_name(ds->dev->of_node, "mdio");
-
- err = of_mdiobus_register(ds->user_mii_bus, dn);
- of_node_put(dn);
+ err = mdiobus_register(ds->user_mii_bus);
if (err < 0)
goto free_user_mii_bus;
}
@@ -1510,6 +1505,16 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (!ds->num_ports)
return -EINVAL;
+ if (ds->phylink_mac_ops) {
+ if (ds->ops->phylink_mac_select_pcs ||
+ ds->ops->phylink_mac_prepare ||
+ ds->ops->phylink_mac_config ||
+ ds->ops->phylink_mac_finish ||
+ ds->ops->phylink_mac_link_down ||
+ ds->ops->phylink_mac_link_up)
+ return -EINVAL;
+ }
+
if (np) {
err = dsa_switch_parse_of(ds, np);
if (err)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c42dac8767..9a249d4ac3 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1535,30 +1535,11 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
-{
- struct device_node *phy_dn;
- struct phy_device *phydev;
-
- phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (!phy_dn)
- return NULL;
-
- phydev = of_phy_find_device(phy_dn);
- if (!phydev) {
- of_node_put(phy_dn);
- return ERR_PTR(-EPROBE_DEFER);
- }
-
- of_node_put(phy_dn);
- return phydev;
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
struct dsa_switch *ds = dp->ds;
@@ -1572,7 +1553,7 @@ static int dsa_port_phylink_mac_prepare(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
int err = 0;
@@ -1587,7 +1568,7 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->phylink_mac_config)
@@ -1600,7 +1581,7 @@ static int dsa_port_phylink_mac_finish(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
int err = 0;
@@ -1615,18 +1596,11 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct phy_device *phydev = NULL;
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (dsa_port_is_user(dp))
- phydev = dp->user->phydev;
-
- if (!ds->ops->phylink_mac_link_down) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_down)
return;
- }
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
@@ -1638,14 +1612,11 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
int speed, int duplex,
bool tx_pause, bool rx_pause)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->phylink_mac_link_up) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_up)
return;
- }
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
speed, duplex, tx_pause, rx_pause);
@@ -1662,6 +1633,7 @@ static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
int dsa_port_phylink_create(struct dsa_port *dp)
{
+ const struct phylink_mac_ops *mac_ops;
struct dsa_switch *ds = dp->ds;
phy_interface_t mode;
struct phylink *pl;
@@ -1685,8 +1657,12 @@ int dsa_port_phylink_create(struct dsa_port *dp)
}
}
- pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
- mode, &dsa_port_phylink_mac_ops);
+ mac_ops = &dsa_port_phylink_mac_ops;
+ if (ds->phylink_mac_ops)
+ mac_ops = ds->phylink_mac_ops;
+
+ pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), mode,
+ mac_ops);
if (IS_ERR(pl)) {
pr_err("error creating PHYLINK: %ld\n", PTR_ERR(pl));
return PTR_ERR(pl);
@@ -1703,78 +1679,6 @@ void dsa_port_phylink_destroy(struct dsa_port *dp)
dp->pl = NULL;
}
-static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
-{
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- int err = 0;
-
- phydev = dsa_port_get_phy_device(dp);
- if (!phydev)
- return 0;
-
- if (IS_ERR(phydev))
- return PTR_ERR(phydev);
-
- if (enable) {
- err = genphy_resume(phydev);
- if (err < 0)
- goto err_put_dev;
-
- err = genphy_read_status(phydev);
- if (err < 0)
- goto err_put_dev;
- } else {
- err = genphy_suspend(phydev);
- if (err < 0)
- goto err_put_dev;
- }
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
-
-err_put_dev:
- put_device(&phydev->mdio.dev);
- return err;
-}
-
-static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
-{
- struct device_node *dn = dp->dn;
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- phy_interface_t mode;
- int err;
-
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
-
- phydev = of_phy_find_device(dn);
-
- err = of_get_phy_mode(dn, &mode);
- if (err)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_read_status(phydev);
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- put_device(&phydev->mdio.dev);
-
- return 0;
-}
-
static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1952,12 +1856,23 @@ static void dsa_shared_port_validate_of(struct dsa_port *dp,
dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
}
+static void dsa_shared_port_link_down(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down)
+ ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED,
+ PHY_INTERFACE_MODE_NA);
+ else if (ds->ops->phylink_mac_link_down)
+ ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED,
+ PHY_INTERFACE_MODE_NA);
+}
+
int dsa_shared_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
bool missing_link_description;
bool missing_phy_mode;
- int port = dp->index;
dsa_shared_port_validate_of(dp, &missing_phy_mode,
&missing_link_description);
@@ -1967,46 +1882,28 @@ int dsa_shared_port_link_register_of(struct dsa_port *dp)
dsa_switches_apply_workarounds))
return -EINVAL;
- if (!ds->ops->adjust_link) {
- if (missing_link_description) {
- dev_warn(ds->dev,
- "Skipping phylink registration for %s port %d\n",
- dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
- } else {
- if (ds->ops->phylink_mac_link_down)
- ds->ops->phylink_mac_link_down(ds, port,
- MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
+ dsa_shared_port_link_down(dp);
- return dsa_shared_port_phylink_register(dp);
- }
- return 0;
+ return dsa_shared_port_phylink_register(dp);
}
- dev_warn(ds->dev,
- "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
-
- if (of_phy_is_fixed_link(dp->dn))
- return dsa_shared_port_fixed_link_register_of(dp);
- else
- return dsa_shared_port_setup_phy_of(dp, true);
+ return 0;
}
void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->adjust_link && dp->pl) {
+ if (dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
dsa_port_phylink_destroy(dp);
return;
}
-
- if (of_phy_is_fixed_link(dp->dn))
- of_phy_deregister_fixed_link(dp->dn);
- else
- dsa_shared_port_setup_phy_of(dp, false);
}
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 2717e9d7b6..1aba1d05c2 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -75,7 +75,7 @@ sja1105_tagger_private(struct dsa_switch *ds)
}
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
-static inline bool sja1105_is_link_local(const struct sk_buff *skb)
+static bool sja1105_is_link_local(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 dmac = ether_addr_to_u64(hdr->h_dest);
@@ -121,7 +121,7 @@ static void sja1105_meta_unpack(const struct sk_buff *skb,
packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
}
-static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
+static bool sja1105_is_meta_frame(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 smac = ether_addr_to_u64(hdr->h_source);
diff --git a/net/dsa/trace.h b/net/dsa/trace.h
index 567f29a397..83f3e5f784 100644
--- a/net/dsa/trace.h
+++ b/net/dsa/trace.h
@@ -39,8 +39,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -98,8 +98,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -157,8 +157,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -199,7 +199,7 @@ TRACE_EVENT(dsa_lag_fdb_add_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -227,7 +227,7 @@ TRACE_EVENT(dsa_lag_fdb_add_bump,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -255,7 +255,7 @@ TRACE_EVENT(dsa_lag_fdb_del_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -283,7 +283,7 @@ TRACE_EVENT(dsa_lag_fdb_del_drop,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -310,7 +310,7 @@ TRACE_EVENT(dsa_lag_fdb_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -338,8 +338,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -383,8 +383,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -426,8 +426,8 @@ TRACE_EVENT(dsa_vlan_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
),
diff --git a/net/dsa/user.c b/net/dsa/user.c
index b15e71cc34..867c5fe9a4 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -210,7 +210,7 @@ static int dsa_user_sync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_uc(struct net_device *dev,
@@ -230,7 +230,7 @@ static int dsa_user_unsync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_sync_mc(struct net_device *dev,
@@ -250,7 +250,7 @@ static int dsa_user_sync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_mc(struct net_device *dev,
@@ -270,7 +270,7 @@ static int dsa_user_unsync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
void dsa_user_sync_ha(struct net_device *dev)
@@ -352,7 +352,7 @@ void dsa_user_mii_bus_init(struct dsa_switch *ds)
/* user device handling ****************************************************/
static int dsa_user_get_iflink(const struct net_device *dev)
{
- return dsa_user_to_conduit(dev)->ifindex;
+ return READ_ONCE(dsa_user_to_conduit(dev)->ifindex);
}
static int dsa_user_open(struct net_device *dev)
@@ -875,8 +875,8 @@ static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx,
return err;
}
-static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
- struct sk_buff *skb)
+static netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
struct dsa_user_priv *p = netdev_priv(dev);
@@ -1222,7 +1222,7 @@ static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
return ret;
}
-static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
@@ -1242,7 +1242,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
return phylink_ethtool_set_eee(dp->pl, e);
}
-static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
@@ -2120,7 +2120,7 @@ int dsa_user_change_mtu(struct net_device *dev, int new_mtu)
if (err)
goto out_port_failed;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
dsa_bridge_mtu_normalization(dp);
@@ -2137,6 +2137,32 @@ out_conduit_failed:
}
static int __maybe_unused
+dsa_user_dcbnl_set_apptrust(struct net_device *dev, u8 *sel, int nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_set_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_set_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
+dsa_user_dcbnl_get_apptrust(struct net_device *dev, u8 *sel, int *nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_get_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_get_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
{
struct dsa_port *dp = dsa_user_to_port(dev);
@@ -2163,6 +2189,58 @@ dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
return 0;
}
+/* Update the DSCP prio entries on all user ports of the switch in case
+ * the switch supports global DSCP prio instead of per port DSCP prios.
+ */
+static int dsa_user_dcbnl_ieee_global_dscp_setdel(struct net_device *dev,
+ struct dcb_app *app, bool del)
+{
+ int (*setdel)(struct net_device *dev, struct dcb_app *app);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ int err, restore_err;
+
+ if (del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ err = setdel(user, app);
+ if (err)
+ goto err_try_to_restore;
+ }
+
+ return 0;
+
+err_try_to_restore:
+
+ /* Revert logic to restore previous state of app entries */
+ if (!del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port_continue_reverse(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ restore_err = setdel(user, app);
+ if (restore_err)
+ netdev_err(user, "Failed to restore DSCP prio entry configuration\n");
+ }
+
+ return err;
+}
+
static int __maybe_unused
dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
{
@@ -2194,6 +2272,17 @@ dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, false);
+ if (err) {
+ if (ds->ops->port_del_dscp_prio)
+ ds->ops->port_del_dscp_prio(ds, port, dscp, new_prio);
+ dcb_ieee_delapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2264,6 +2353,18 @@ dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, true);
+ if (err) {
+ if (ds->ops->port_add_dscp_prio)
+ ds->ops->port_add_dscp_prio(ds, port, dscp,
+ app->priority);
+ dcb_ieee_setapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2376,6 +2477,8 @@ static const struct ethtool_ops dsa_user_ethtool_ops = {
static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = {
.ieee_setapp = dsa_user_dcbnl_ieee_setapp,
.ieee_delapp = dsa_user_dcbnl_ieee_delapp,
+ .dcbnl_setapptrust = dsa_user_dcbnl_set_apptrust,
+ .dcbnl_getapptrust = dsa_user_dcbnl_get_apptrust,
};
static void dsa_user_get_stats64(struct net_device *dev,
@@ -2429,7 +2532,7 @@ static const struct net_device_ops dsa_user_netdev_ops = {
.ndo_fill_forward_path = dsa_user_fill_forward_path,
};
-static struct device_type dsa_type = {
+static const struct device_type dsa_type = {
.name = "dsa",
};
@@ -2445,7 +2548,7 @@ EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change);
static void dsa_user_phylink_fixed_state(struct phylink_config *config,
struct phylink_link_state *state)
{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
/* No need to check that this operation is valid, the callback would
@@ -2625,11 +2728,7 @@ int dsa_user_create(struct dsa_port *port)
user_dev->vlan_features = conduit->vlan_features;
p = netdev_priv(user_dev);
- user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!user_dev->tstats) {
- free_netdev(user_dev);
- return -ENOMEM;
- }
+ user_dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
ret = gro_cells_init(&p->gcells, user_dev);
if (ret)
@@ -2695,7 +2794,6 @@ out_phy:
out_gcells:
gro_cells_destroy(&p->gcells);
out_free:
- free_percpu(user_dev->tstats);
free_netdev(user_dev);
port->user = NULL;
return ret;
@@ -2716,7 +2814,6 @@ void dsa_user_destroy(struct net_device *user_dev)
dsa_port_phylink_destroy(dp);
gro_cells_destroy(&p->gcells);
- free_percpu(user_dev->tstats);
free_netdev(user_dev);
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 049c3adeb8..4e3651101b 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
- eth = (struct ethhdr *)skb->data;
- skb_pull_inline(skb, ETH_HLEN);
-
+ eth = eth_skb_pull_mac(skb);
eth_skb_pkt_type(skb, dev);
/*
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 2853394d06..bf398973eb 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -4,16 +4,13 @@
#include "common.h"
#include "bitset.h"
-#define EEE_MODES_COUNT \
- (sizeof_field(struct ethtool_eee, supported) * BITS_PER_BYTE)
-
struct eee_req_info {
struct ethnl_req_info base;
};
struct eee_reply_data {
struct ethnl_reply_data base;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
#define EEE_REPDATA(__reply_base) \
@@ -30,6 +27,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct ethtool_keee *eee = &data->eee;
int ret;
if (!dev->ethtool_ops->get_eee)
@@ -37,7 +35,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = dev->ethtool_ops->get_eee(dev, &data->eee);
+ ret = dev->ethtool_ops->get_eee(dev, eee);
ethnl_ops_complete(dev);
return ret;
@@ -48,24 +46,21 @@ static int eee_reply_size(const struct ethnl_req_info *req_base,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int len = 0;
int ret;
- BUILD_BUG_ON(sizeof(eee->advertised) * BITS_PER_BYTE !=
- EEE_MODES_COUNT);
- BUILD_BUG_ON(sizeof(eee->lp_advertised) * BITS_PER_BYTE !=
- EEE_MODES_COUNT);
-
/* MODES_OURS */
- ret = ethnl_bitset32_size(&eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
/* MODES_PEERS */
- ret = ethnl_bitset32_size(&eee->lp_advertised, NULL,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
@@ -84,24 +79,26 @@ static int eee_fill_reply(struct sk_buff *skb,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS,
- &eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_OURS,
+ eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_PEER,
- &eee->lp_advertised, NULL, EEE_MODES_COUNT,
- link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_PEER,
+ eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, !!eee->eee_active) ||
- nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, !!eee->eee_enabled) ||
+ if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, eee->eee_active) ||
+ nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, eee->eee_enabled) ||
nla_put_u8(skb, ETHTOOL_A_EEE_TX_LPI_ENABLED,
- !!eee->tx_lpi_enabled) ||
+ eee->tx_lpi_enabled) ||
nla_put_u32(skb, ETHTOOL_A_EEE_TX_LPI_TIMER, eee->tx_lpi_timer))
return -EMSGSIZE;
@@ -132,7 +129,7 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct ethtool_eee eee = {};
+ struct ethtool_keee eee = {};
bool mod = false;
int ret;
@@ -140,14 +137,15 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
if (ret < 0)
return ret;
- ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT,
- tb[ETHTOOL_A_EEE_MODES_OURS],
- link_mode_names, info->extack, &mod);
+ ret = ethnl_update_bitset(eee.advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ tb[ETHTOOL_A_EEE_MODES_OURS],
+ link_mode_names, info->extack, &mod);
if (ret < 0)
return ret;
- ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
- ethnl_update_bool32(&eee.tx_lpi_enabled,
- tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod);
+ ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
+ ethnl_update_bool(&eee.tx_lpi_enabled, tb[ETHTOOL_A_EEE_TX_LPI_ENABLED],
+ &mod);
ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER],
&mod);
if (!mod)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 7519b0818b..fcc3dbef8b 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -26,12 +26,12 @@
#include <linux/sched/signal.h>
#include <linux/net.h>
#include <linux/pm_runtime.h>
+#include <linux/utsname.h>
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
#include <linux/ethtool_netlink.h>
-#include <generated/utsrelease.h>
#include "common.h"
/* State held across locks and calls for commands which have devlink fallback */
@@ -713,7 +713,8 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp)
struct device *parent = dev->dev.parent;
rsp->info.cmd = ETHTOOL_GDRVINFO;
- strscpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
+ strscpy(rsp->info.version, init_uts_ns.name.release,
+ sizeof(rsp->info.version));
if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &rsp->info);
if (!rsp->info.bus_info[0] && parent)
@@ -1276,11 +1277,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
const struct ethtool_ops *ops = dev->ethtool_ops;
u32 dev_indir_size = 0, dev_key_size = 0, i;
+ u32 user_indir_len = 0, indir_bytes = 0;
struct ethtool_rxfh_param rxfh_dev = {};
struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
- u32 indir_bytes = 0;
u8 *rss_config;
int ret;
@@ -1305,7 +1306,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
return -EINVAL;
- if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE &&
+ (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
!ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
@@ -1340,6 +1342,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
*/
if (rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
+ user_indir_len = indir_bytes;
rxfh_dev.indir = (u32 *)rss_config;
rxfh_dev.indir_size = dev_indir_size;
ret = ethtool_copy_validate_indir(rxfh_dev.indir,
@@ -1366,7 +1369,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
rxfh_dev.key_size = dev_key_size;
rxfh_dev.key = rss_config + indir_bytes;
if (copy_from_user(rxfh_dev.key,
- useraddr + rss_cfg_offset + indir_bytes,
+ useraddr + rss_cfg_offset + user_indir_len,
rxfh.key_size)) {
ret = -EFAULT;
goto out;
@@ -1508,22 +1511,57 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return 0;
}
+static void eee_to_keee(struct ethtool_keee *keee,
+ const struct ethtool_eee *eee)
+{
+ memset(keee, 0, sizeof(*keee));
+
+ keee->eee_enabled = eee->eee_enabled;
+ keee->tx_lpi_enabled = eee->tx_lpi_enabled;
+ keee->tx_lpi_timer = eee->tx_lpi_timer;
+
+ ethtool_convert_legacy_u32_to_link_mode(keee->advertised,
+ eee->advertised);
+}
+
+static void keee_to_eee(struct ethtool_eee *eee,
+ const struct ethtool_keee *keee)
+{
+ bool overflow;
+
+ memset(eee, 0, sizeof(*eee));
+
+ eee->eee_active = keee->eee_active;
+ eee->eee_enabled = keee->eee_enabled;
+ eee->tx_lpi_enabled = keee->tx_lpi_enabled;
+ eee->tx_lpi_timer = keee->tx_lpi_timer;
+
+ overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported,
+ keee->supported);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->advertised,
+ keee->advertised);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised,
+ keee->lp_advertised);
+ if (overflow)
+ pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n");
+}
+
static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int rc;
if (!dev->ethtool_ops->get_eee)
return -EOPNOTSUPP;
- memset(&edata, 0, sizeof(struct ethtool_eee));
- edata.cmd = ETHTOOL_GEEE;
- rc = dev->ethtool_ops->get_eee(dev, &edata);
-
+ memset(&keee, 0, sizeof(keee));
+ rc = dev->ethtool_ops->get_eee(dev, &keee);
if (rc)
return rc;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ keee_to_eee(&eee, &keee);
+ if (copy_to_user(useraddr, &eee, sizeof(eee)))
return -EFAULT;
return 0;
@@ -1531,16 +1569,18 @@ static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int ret;
if (!dev->ethtool_ops->set_eee)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ if (copy_from_user(&eee, useraddr, sizeof(eee)))
return -EFAULT;
- ret = dev->ethtool_ops->set_eee(dev, &edata);
+ eee_to_keee(&keee, &eee);
+ ret = dev->ethtool_ops->set_eee(dev, &keee);
if (!ret)
ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
return ret;
@@ -2182,7 +2222,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
int n_stats, ret;
- if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
+ if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats)
return -EOPNOTSUPP;
n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index b2de2108b3..34d76e8784 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi(phydev);
mutex_unlock(&phydev->lock);
@@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi_max)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi_max(phydev);
mutex_unlock(&phydev->lock);
@@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
return ret;
};
+static bool linkstate_sqi_critical_error(int sqi)
+{
+ return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
+}
+
+static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
+{
+ return data->sqi >= 0 && data->sqi_max >= 0 &&
+ data->sqi <= data->sqi_max;
+}
+
static int linkstate_get_link_ext_state(struct net_device *dev,
struct linkstate_reply_data *data)
{
@@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
data->link = __ethtool_get_link(dev);
ret = linkstate_get_sqi(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
- if (data->sqi != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
-
- if (data->sqi_max != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
+ if (linkstate_sqi_valid(data)) {
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
+ }
if (data->link_ext_state_provided)
len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
@@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
- if (data->sqi != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
- return -EMSGSIZE;
+ if (linkstate_sqi_valid(data)) {
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
- if (data->sqi_max != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
- return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
+ data->sqi_max))
+ return -EMSGSIZE;
+ }
if (data->link_ext_state_provided) {
if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index fe3553f60b..bd04f28d5c 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -477,11 +477,7 @@ out:
return ret;
}
-/* Default ->dumpit() handler for GET requests. Device iteration copied from
- * rtnl_dump_ifinfo(); we have to be more careful about device hashtable
- * persistence as we cannot guarantee to hold RTNL lock through the whole
- * function as rtnetnlink does.
- */
+/* Default ->dumpit() handler for GET requests. */
static int ethnl_default_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -490,14 +486,14 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
struct net_device *dev;
int ret = 0;
- rtnl_lock();
+ rcu_read_lock();
for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
dev_hold(dev);
- rtnl_unlock();
+ rcu_read_unlock();
ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
- rtnl_lock();
+ rcu_read_lock();
dev_put(dev);
if (ret < 0 && ret != -EOPNOTSUPP) {
@@ -507,7 +503,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
}
ret = 0;
}
- rtnl_unlock();
+ rcu_read_unlock();
return ret;
}
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index cc478af771..776ac96cda 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -82,6 +82,10 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */
if (st->podl_pw_status > 0)
len += nla_total_size(sizeof(u32)); /* _PODL_PSE_PW_D_STATUS */
+ if (st->c33_admin_state > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */
+ if (st->c33_pw_status > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */
return len;
}
@@ -103,6 +107,16 @@ static int pse_fill_reply(struct sk_buff *skb,
st->podl_pw_status))
return -EMSGSIZE;
+ if (st->c33_admin_state > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_ADMIN_STATE,
+ st->c33_admin_state))
+ return -EMSGSIZE;
+
+ if (st->c33_pw_status > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_D_STATUS,
+ st->c33_pw_status))
+ return -EMSGSIZE;
+
return 0;
}
@@ -113,25 +127,18 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
+ [ETHTOOL_A_C33_PSE_ADMIN_CONTROL] =
+ NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
+ ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED),
};
static int
ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL];
-}
-
-static int
-ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
-{
struct net_device *dev = req_info->dev;
- struct pse_control_config config = {};
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
- /* this values are already validated by the ethnl_pse_set_policy */
- config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
-
phydev = dev->phydev;
if (!phydev) {
NL_SET_ERR_MSG(info->extack, "No PHY is attached");
@@ -143,7 +150,42 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
}
- /* Return errno directly - PSE has no notification */
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
+ !pse_has_podl(phydev->psec)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
+ "setting PoDL PSE admin control not supported");
+ return -EOPNOTSUPP;
+ }
+ if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
+ !pse_has_c33(phydev->psec)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
+ "setting C33 PSE admin control not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 1;
+}
+
+static int
+ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ struct pse_control_config config = {};
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
+
+ phydev = dev->phydev;
+ /* These values are already validated by the ethnl_pse_set_policy */
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL])
+ config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+ if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL])
+ config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]);
+
+ /* Return errno directly - PSE has no notification
+ * pse_ethtool_set_config() will do nothing if the config is null
+ */
return pse_ethtool_set_config(phydev->psec, info->extack, &config);
}
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 71679137ef..5c4c4505ab 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base,
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
int len;
- len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */
+ nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */
nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
nla_total_size(data->hkey_size); /* _RSS_HKEY */
@@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+
+ if (request->rss_context &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context))
+ return -EMSGSIZE;
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 9daed0aab1..57d496287e 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -13,14 +13,18 @@ struct tsinfo_req_info {
struct tsinfo_reply_data {
struct ethnl_reply_data base;
struct ethtool_ts_info ts_info;
+ struct ethtool_ts_stats stats;
};
#define TSINFO_REPDATA(__reply_base) \
container_of(__reply_base, struct tsinfo_reply_data, base)
+#define ETHTOOL_TS_STAT_CNT \
+ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1))
+
const struct nla_policy ethnl_tsinfo_get_policy[] = {
[ETHTOOL_A_TSINFO_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_stats),
};
static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
@@ -34,6 +38,12 @@ static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
+ if (req_base->flags & ETHTOOL_FLAG_STATS) {
+ ethtool_stats_init((u64 *)&data->stats,
+ sizeof(data->stats) / sizeof(u64));
+ if (dev->ethtool_ops->get_ts_stats)
+ dev->ethtool_ops->get_ts_stats(dev, &data->stats);
+ }
ret = __ethtool_get_ts_info(dev, &data->ts_info);
ethnl_ops_complete(dev);
@@ -79,10 +89,47 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
}
if (ts_info->phc_index >= 0)
len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */
+ if (req_base->flags & ETHTOOL_FLAG_STATS)
+ len += nla_total_size(0) + /* _TSINFO_STATS */
+ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT;
return len;
}
+static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+ if (val == ETHTOOL_STAT_NOT_SET)
+ return 0;
+ if (nla_put_uint(skb, attrtype, val))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int tsinfo_put_stats(struct sk_buff *skb,
+ const struct ethtool_ts_stats *stats)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (tsinfo_put_stat(skb, stats->tx_stats.pkts,
+ ETHTOOL_A_TS_STAT_TX_PKTS) ||
+ tsinfo_put_stat(skb, stats->tx_stats.lost,
+ ETHTOOL_A_TS_STAT_TX_LOST) ||
+ tsinfo_put_stat(skb, stats->tx_stats.err,
+ ETHTOOL_A_TS_STAT_TX_ERR))
+ goto err_cancel;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int tsinfo_fill_reply(struct sk_buff *skb,
const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
@@ -119,6 +166,9 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
if (ts_info->phc_index >= 0 &&
nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index))
return -EMSGSIZE;
+ if (req_base->flags & ETHTOOL_FLAG_STATS &&
+ tsinfo_put_stats(skb, &data->stats))
+ return -EMSGSIZE;
return 0;
}
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index d697f68c59..d6f5283982 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -213,7 +213,6 @@ static int tls_handshake_accept(struct handshake_req *req,
if (!hdr)
goto out_cancel;
- ret = -EMSGSIZE;
ret = nla_put_s32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
if (ret < 0)
goto out_cancel;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 9d71b66183..e6904288d4 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -28,29 +28,19 @@ static bool is_slave_up(struct net_device *dev)
return dev && is_admin_up(dev) && netif_oper_up(dev);
}
-static void __hsr_set_operstate(struct net_device *dev, int transition)
-{
- write_lock(&dev_base_lock);
- if (dev->operstate != transition) {
- dev->operstate = transition;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- } else {
- write_unlock(&dev_base_lock);
- }
-}
-
static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
{
- if (!is_admin_up(master->dev)) {
- __hsr_set_operstate(master->dev, IF_OPER_DOWN);
+ struct net_device *dev = master->dev;
+
+ if (!is_admin_up(dev)) {
+ netdev_set_operstate(dev, IF_OPER_DOWN);
return;
}
if (has_carrier)
- __hsr_set_operstate(master->dev, IF_OPER_UP);
+ netdev_set_operstate(dev, IF_OPER_UP);
else
- __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN);
+ netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}
static bool hsr_check_carrier(struct hsr_port *master)
@@ -71,39 +61,36 @@ static bool hsr_check_carrier(struct hsr_port *master)
return false;
}
-static void hsr_check_announce(struct net_device *hsr_dev,
- unsigned char old_operstate)
+static void hsr_check_announce(struct net_device *hsr_dev)
{
struct hsr_priv *hsr;
hsr = netdev_priv(hsr_dev);
-
- if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) {
- /* Went up */
- hsr->announce_count = 0;
- mod_timer(&hsr->announce_timer,
- jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) {
+ /* Enable announce timer and start sending supervisory frames */
+ if (!timer_pending(&hsr->announce_timer)) {
+ hsr->announce_count = 0;
+ mod_timer(&hsr->announce_timer, jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ }
+ } else {
+ /* Deactivate the announce timer */
+ timer_delete(&hsr->announce_timer);
}
-
- if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP)
- /* Went down */
- del_timer(&hsr->announce_timer);
}
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
{
struct hsr_port *master;
- unsigned char old_operstate;
bool has_carrier;
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
/* netif_stacked_transfer_operstate() cannot be used here since
* it doesn't set IF_OPER_LOWERLAYERDOWN (?)
*/
- old_operstate = master->dev->operstate;
has_carrier = hsr_check_carrier(master);
hsr_set_operstate(master, has_carrier);
- hsr_check_announce(master->dev, old_operstate);
+ hsr_check_announce(master->dev);
}
int hsr_get_max_mtu(struct hsr_priv *hsr)
@@ -133,7 +120,7 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -142,30 +129,32 @@ static int hsr_dev_open(struct net_device *dev)
{
struct hsr_priv *hsr;
struct hsr_port *port;
- char designation;
+ const char *designation = NULL;
hsr = netdev_priv(dev);
- designation = '\0';
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
case HSR_PT_SLAVE_A:
- designation = 'A';
+ designation = "Slave A";
break;
case HSR_PT_SLAVE_B:
- designation = 'B';
+ designation = "Slave B";
+ break;
+ case HSR_PT_INTERLINK:
+ designation = "Interlink";
break;
default:
- designation = '?';
+ designation = "Unknown";
}
if (!is_slave_up(port->dev))
- netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+ netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
designation, port->dev->name);
}
- if (designation == '\0')
+ if (!designation)
netdev_warn(dev, "No slave devices configured\n");
return 0;
@@ -296,6 +285,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
struct hsr_priv *hsr = master->hsr;
__u8 type = HSR_TLV_LIFE_CHECK;
struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tlv *hsr_stlv;
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
@@ -335,6 +325,16 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+ if (hsr->redbox) {
+ hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv));
+ hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC;
+ hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+
+ /* Payload: MacAddressRedBox */
+ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+ ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
+ }
+
if (skb_put_padto(skb, ETH_ZLEN)) {
spin_unlock_bh(&hsr->seqnr_lock);
return;
@@ -416,6 +416,10 @@ void hsr_del_ports(struct hsr_priv *hsr)
if (port)
hsr_del_port(port);
+ port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (port)
+ hsr_del_port(port);
+
port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
if (port)
hsr_del_port(port);
@@ -477,7 +481,7 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_set_rx_mode = hsr_set_rx_mode,
};
-static struct device_type hsr_type = {
+static const struct device_type hsr_type = {
.name = "hsr",
};
@@ -545,8 +549,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
};
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack)
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack)
{
bool unregister = false;
struct hsr_priv *hsr;
@@ -555,6 +559,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
INIT_LIST_HEAD(&hsr->node_db);
+ INIT_LIST_HEAD(&hsr->proxy_node_db);
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
@@ -580,9 +585,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
/* Overflow soon to find bugs easier: */
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
+ hsr->interlink_sequence_nr = HSR_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
+ timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0);
ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
@@ -615,6 +622,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_unregister;
+ if (interlink) {
+ res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack);
+ if (res)
+ goto err_unregister;
+
+ hsr->redbox = true;
+ ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr);
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+ }
+
hsr_debugfs_init(hsr, hsr_dev);
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 9060c92168..655284095b 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -16,8 +16,8 @@
void hsr_del_ports(struct hsr_priv *hsr);
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack);
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack);
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
int hsr_get_max_mtu(struct hsr_priv *hsr);
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 5d68cb1816..05a61b8286 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -377,6 +377,15 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
*/
ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr);
}
+
+ /* When HSR node is used as RedBox - the frame received from HSR ring
+ * requires source MAC address (SA) replacement to one which can be
+ * recognized by SAN devices (otherwise, frames are dropped by switch)
+ */
+ if (port->type == HSR_PT_INTERLINK)
+ ether_addr_copy(eth_hdr(skb)->h_source,
+ port->hsr->macaddress_redbox);
+
return dev_queue_xmit(skb);
}
@@ -390,9 +399,57 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
+ struct sk_buff *skb;
+
if (port->dev->features & NETIF_F_HW_HSR_FWD)
return prp_drop_frame(frame, port);
+ /* RedBox specific frames dropping policies
+ *
+ * Do not send HSR supervisory frames to SAN devices
+ */
+ if (frame->is_supervision && port->type == HSR_PT_INTERLINK)
+ return true;
+
+ /* Do not forward to other HSR port (A or B) unicast frames which
+ * are addressed to interlink port (and are in the ProxyNodeTable).
+ */
+ skb = frame->skb_hsr;
+ if (skb && prp_drop_frame(frame, port) &&
+ is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+
+ /* Do not forward to port C (Interlink) frames from nodes A and B
+ * if DA is in NodeTable.
+ */
+ if ((frame->port_rcv->type == HSR_PT_SLAVE_A ||
+ frame->port_rcv->type == HSR_PT_SLAVE_B) &&
+ port->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_hsr;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
+ /* Do not forward to port A and B unicast frames received on the
+ * interlink port if it is addressed to one of nodes registered in
+ * the ProxyNodeTable.
+ */
+ if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) &&
+ frame->port_rcv->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_std;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
return false;
}
@@ -448,13 +505,14 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
}
/* Check if frame is to be dropped. Eg. for PRP no forward
- * between ports.
+ * between ports, or sending HSR supervision to RedBox.
*/
if (hsr->proto_ops->drop_frame &&
hsr->proto_ops->drop_frame(frame, port))
continue;
- if (port->type != HSR_PT_MASTER)
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
skb = hsr->proto_ops->create_tagged_frame(frame, port);
else
skb = hsr->proto_ops->get_untagged_frame(frame, port);
@@ -469,7 +527,9 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
hsr_deliver_master(skb, port->dev, frame->node_src);
} else {
if (!hsr_xmit(skb, port, frame))
- sent = true;
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
+ sent = true;
}
}
}
@@ -503,10 +563,12 @@ static void handle_std_frame(struct sk_buff *skb,
frame->skb_prp = NULL;
frame->skb_std = skb;
- if (port->type != HSR_PT_MASTER) {
+ if (port->type != HSR_PT_MASTER)
frame->is_from_san = true;
- } else {
- /* Sequence nr for the master node */
+
+ if (port->type == HSR_PT_MASTER ||
+ port->type == HSR_PT_INTERLINK) {
+ /* Sequence nr for the master/interlink node */
lockdep_assert_held(&hsr->seqnr_lock);
frame->sequence_nr = hsr->sequence_nr;
hsr->sequence_nr++;
@@ -564,6 +626,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
{
struct hsr_priv *hsr = port->hsr;
struct hsr_vlan_ethhdr *vlan_hdr;
+ struct list_head *n_db;
struct ethhdr *ethhdr;
__be16 proto;
int ret;
@@ -574,9 +637,13 @@ static int fill_frame_info(struct hsr_frame_info *frame,
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
- frame->is_supervision,
- port->type);
+
+ n_db = &hsr->node_db;
+ if (port->type == HSR_PT_INTERLINK)
+ n_db = &hsr->proxy_node_db;
+
+ frame->node_src = hsr_get_node(port, n_db, skb,
+ frame->is_supervision, port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 26329db092..614df96497 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -71,6 +71,14 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
return NULL;
}
+/* Check if node for a given MAC address is already present in data base
+ */
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
+{
+ return !!find_node_by_addr_A(node_db, addr);
+}
+
/* Helper for device init; the self_node is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
@@ -223,6 +231,15 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
}
}
+ /* Check if required node is not in proxy nodes table */
+ list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
+ if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
+ if (hsr->proto_ops->update_san_info)
+ hsr->proto_ops->update_san_info(node, is_sup);
+ return node;
+ }
+ }
+
/* Everyone may create a node entry, connected node to a HSR/PRP
* device.
*/
@@ -418,6 +435,10 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
node_dst = find_node_by_addr_A(&port->hsr->node_db,
eth_hdr(skb)->h_dest);
+ if (!node_dst && port->hsr->redbox)
+ node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest);
+
if (!node_dst) {
if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
netdev_err(skb->dev, "%s: Unknown node\n", __func__);
@@ -561,6 +582,37 @@ void hsr_prune_nodes(struct timer_list *t)
jiffies + msecs_to_jiffies(PRUNE_PERIOD));
}
+void hsr_prune_proxy_nodes(struct timer_list *t)
+{
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_proxy_timer);
+ unsigned long timestamp;
+ struct hsr_node *node;
+ struct hsr_node *tmp;
+
+ spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) {
+ timestamp = node->time_in[HSR_PT_INTERLINK];
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ if (!node->removed) {
+ list_del_rcu(&node->mac_list);
+ node->removed = true;
+ /* Note that we need to free this entry later: */
+ kfree_rcu(node, rcu_head);
+ }
+ }
+ }
+
+ spin_unlock_bh(&hsr->list_lock);
+
+ /* Restart timer */
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+}
+
void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index b23556251d..7619e31c1d 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -46,6 +46,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr);
void hsr_prune_nodes(struct timer_list *t);
+void hsr_prune_proxy_nodes(struct timer_list *t);
int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
@@ -67,6 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
struct hsr_node *node);
void prp_update_san_info(struct hsr_node *node, bool is_sup);
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN]);
+
struct hsr_node {
struct list_head mac_list;
/* Protect R/W access to seq_out */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 9756e657ba..d7ae32473c 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -96,7 +96,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
break; /* Handled in ndo_change_mtu() */
mtu_max = hsr_get_max_mtu(port->hsr);
master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
- master->dev->mtu = mtu_max;
+ WRITE_ONCE(master->dev->mtu, mtu_max);
break;
case NETDEV_UNREGISTER:
if (!is_hsr_master(dev)) {
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 18e01791ad..23850b16d1 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -21,6 +21,7 @@
*/
#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
#define HSR_NODE_FORGET_TIME 60000 /* ms */
+#define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */
#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
#define HSR_ENTRY_FORGET_TIME 400 /* ms */
@@ -35,6 +36,7 @@
* HSR_NODE_FORGET_TIME?
*/
#define PRUNE_PERIOD 3000 /* ms */
+#define PRUNE_PROXY_PERIOD 3000 /* ms */
#define HSR_TLV_EOT 0 /* End of TLVs */
#define HSR_TLV_ANNOUNCE 22
#define HSR_TLV_LIFE_CHECK 23
@@ -192,11 +194,14 @@ struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
struct list_head node_db; /* Known HSR nodes */
+ struct list_head proxy_node_db; /* RedBox HSR proxy nodes */
struct hsr_self_node __rcu *self_node; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
+ struct timer_list prune_proxy_timer;
int announce_count;
u16 sequence_nr;
+ u16 interlink_sequence_nr; /* Interlink port seq_nr */
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
@@ -209,6 +214,8 @@ struct hsr_priv {
* of lan_id
*/
bool fwd_offloaded; /* Forwarding offloaded to HW */
+ bool redbox; /* Device supports HSR RedBox */
+ unsigned char macaddress_redbox[ETH_ALEN];
unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16));
/* Align to u16 boundary to avoid unaligned access
* in ether_addr_equal
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 78fe40eb9f..898f18c6da 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
[IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN },
[IFLA_HSR_SEQ_NR] = { .type = NLA_U16 },
[IFLA_HSR_PROTOCOL] = { .type = NLA_U8 },
+ [IFLA_HSR_INTERLINK] = { .type = NLA_U32 },
};
/* Here, it seems a netdevice has already been allocated for us, and the
@@ -35,8 +36,8 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
enum hsr_version proto_version;
unsigned char multicast_spec;
u8 proto = HSR_PROTOCOL_HSR;
- struct net_device *link[2];
+ struct net_device *link[2], *interlink = NULL;
if (!data) {
NL_SET_ERR_MSG_MOD(extack, "No slave devices specified");
return -EINVAL;
@@ -67,6 +68,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
+ if (data[IFLA_HSR_INTERLINK])
+ interlink = __dev_get_by_index(src_net,
+ nla_get_u32(data[IFLA_HSR_INTERLINK]));
+
+ if (interlink && interlink == link[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same");
+ return -EINVAL;
+ }
+
+ if (interlink && interlink == link[1]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave2 are the same");
+ return -EINVAL;
+ }
+
if (!data[IFLA_HSR_MULTICAST_SPEC])
multicast_spec = 0;
else
@@ -96,10 +111,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
}
}
- if (proto == HSR_PROTOCOL_PRP)
+ if (proto == HSR_PROTOCOL_PRP) {
proto_version = PRP_V1;
+ if (interlink) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interlink only works with HSR");
+ return -EINVAL;
+ }
+ }
- return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack);
+ return hsr_dev_finalize(dev, link, interlink, multicast_spec,
+ proto_version, extack);
}
static void hsr_dellink(struct net_device *dev, struct list_head *head)
@@ -107,6 +129,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
struct hsr_priv *hsr = netdev_priv(dev);
del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->prune_proxy_timer);
del_timer_sync(&hsr->announce_timer);
hsr_debugfs_term(hsr);
@@ -114,6 +137,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_self_node(hsr);
hsr_del_nodes(&hsr->node_db);
+ hsr_del_nodes(&hsr->proxy_node_db);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 1b6457f357..af6cf64a00 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -55,6 +55,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
protocol = eth_hdr(skb)->h_proto;
if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+ port->type != HSR_PT_INTERLINK &&
hsr->proto_ops->invalid_dan_ingress_frame &&
hsr->proto_ops->invalid_dan_ingress_frame(protocol))
goto finish_pass;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2c087b7f17..77b4e92027 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -93,7 +93,7 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
static int lowpan_get_iflink(const struct net_device *dev)
{
- return lowpan_802154_dev(dev)->wdev->ifindex;
+ return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex);
}
static const struct net_device_ops lowpan_netdev_ops = {
@@ -280,5 +280,6 @@ static void __exit lowpan_cleanup_module(void)
module_init(lowpan_init_module);
module_exit(lowpan_cleanup_module);
+MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6dd960ec55..56ef873828 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -338,7 +338,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -351,7 +350,6 @@ static struct ctl_table lowpan_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
@@ -370,10 +368,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
}
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
@@ -399,7 +395,7 @@ err_alloc:
static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 00302e8b96..990a83455d 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1137,4 +1137,5 @@ module_init(af_ieee802154_init);
module_exit(af_ieee802154_remove);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IEEE 802.15.4 socket interface");
MODULE_ALIAS_NETPROTO(PF_IEEE802154);
diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index d290393380..6708160ebf 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c
@@ -93,7 +93,7 @@ static SIMPLE_DEV_PM_OPS(wpan_phy_pm_ops, wpan_phy_suspend, wpan_phy_resume);
#define WPAN_PHY_PM_OPS NULL
#endif
-struct class wpan_phy_class = {
+const struct class wpan_phy_class = {
.name = "ieee802154",
.dev_release = wpan_phy_release,
.dev_groups = pmib_groups,
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index 337545b639..69961e1662 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -5,6 +5,6 @@
int wpan_phy_sysfs_init(void);
void wpan_phy_sysfs_exit(void);
-extern struct class wpan_phy_class;
+extern const struct class wpan_phy_class;
#endif /* __IEEE802154_SYSFS_H */
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 62aa646525..591ce0a16f 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -75,7 +75,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf,
),
TP_fast_assign(
WPAN_PHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
__entry->extended_addr = extended_addr;
),
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a5a820ee20..b24d746166 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,7 @@
#endif
#include <net/l3mdev.h>
#include <net/compat.h>
+#include <net/rps.h>
#include <trace/events/sock.h>
@@ -757,7 +758,9 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
sock_rps_record_flow(newsk);
WARN_ON(!((1 << newsk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
- TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
+ TCPF_CLOSING | TCPF_CLOSE_WAIT |
+ TCPF_CLOSE)));
if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
@@ -770,16 +773,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
-int inet_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int inet_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk1 = sock->sk, *sk2;
- int err = -EINVAL;
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
- sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
+ arg->err = -EINVAL;
+ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg);
if (!sk2)
- return err;
+ return arg->err;
lock_sock(sk2);
__inet_accept(sock, newsock, sk2);
@@ -1071,6 +1074,7 @@ const struct proto_ops inet_stream_ops = {
#endif
.splice_eof = inet_splice_eof,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.sendmsg_locked = tcp_sendmsg_locked,
@@ -1103,7 +1107,7 @@ const struct proto_ops inet_dgram_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.splice_eof = inet_splice_eof,
- .set_peek_off = sk_set_peek_off,
+ .set_peek_off = udp_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet_compat_ioctl,
#endif
@@ -1305,8 +1309,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
int inet_sk_rebuild_header(struct sock *sk)
{
+ struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -1326,7 +1330,7 @@ int inet_sk_rebuild_header(struct sock *sk)
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (!IS_ERR(rt)) {
err = 0;
@@ -1480,7 +1484,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
struct sk_buff *p;
unsigned int hlen;
unsigned int off;
- unsigned int id;
int flush = 1;
int proto;
@@ -1506,13 +1509,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
goto out;
NAPI_GRO_CB(skb)->proto = proto;
- id = ntohl(*(__be32 *)&iph->id);
- flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
- id >>= 16;
+ flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
list_for_each_entry(p, head, list) {
struct iphdr *iph2;
- u16 flush_id;
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -1529,48 +1529,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
-
- /* All fields must match except length and checksum. */
- NAPI_GRO_CB(p)->flush |=
- (iph->ttl ^ iph2->ttl) |
- (iph->tos ^ iph2->tos) |
- ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
-
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* We need to store of the IP ID check to be included later
- * when we can verify that this packet does in fact belong
- * to a given flow.
- */
- flush_id = (u16)(id - ntohs(iph2->id));
-
- /* This bit of code makes it much easier for us to identify
- * the cases where we are doing atomic vs non-atomic IP ID
- * checks. Specifically an atomic check can return IP ID
- * values 0 - 0xFFFF, while a non-atomic check can only
- * return 0 or 0xFFFF.
- */
- if (!NAPI_GRO_CB(p)->is_atomic ||
- !(iph->frag_off & htons(IP_DF))) {
- flush_id ^= NAPI_GRO_CB(p)->count;
- flush_id = flush_id ? 0xFFFF : 0;
- }
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = flush_id;
- else
- NAPI_GRO_CB(p)->flush_id |= flush_id;
}
- NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
NAPI_GRO_CB(skb)->flush |= flush;
- skb_set_network_header(skb, off);
- /* The above will be needed by the transport layer if there is one
- * immediately following this IP hdr.
- */
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
@@ -1751,19 +1713,6 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
- .no_policy = 1,
- .icmp_strict_tag_validation = 1,
-};
-
-static const struct net_protocol udp_protocol = {
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
-};
-
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = icmp_err,
@@ -1904,14 +1853,6 @@ static int ipv4_proc_init(void);
* IP protocol layer initialiser
*/
-static struct packet_offload ip_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IP),
- .callbacks = {
- .gso_segment = inet_gso_segment,
- .gro_receive = inet_gro_receive,
- .gro_complete = inet_gro_complete,
- },
-};
static const struct net_offload ipip_offload = {
.callbacks = {
@@ -1938,7 +1879,15 @@ static int __init ipv4_offload_init(void)
if (ipip_offload_init() < 0)
pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
- dev_add_offload(&ip_packet_offload);
+ net_hotdata.ip_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IP),
+ .callbacks = {
+ .gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ip_packet_offload);
return 0;
}
@@ -1992,9 +1941,22 @@ static int __init inet_init(void)
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
pr_crit("%s: Cannot add ICMP protocol\n", __func__);
- if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+
+ net_hotdata.udp_protocol = (struct net_protocol) {
+ .handler = udp_rcv,
+ .err_handler = udp_err,
+ .no_policy = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0)
pr_crit("%s: Cannot add UDP protocol\n", __func__);
- if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+
+ net_hotdata.tcp_protocol = (struct net_protocol) {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
+ .no_policy = 1,
+ .icmp_strict_tag_validation = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0)
pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0d0d725b46..11c1519b36 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
/*unsigned long now; */
struct net *net = dev_net(dev);
- rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
+ rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev),
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
@@ -1002,6 +1003,55 @@ out_of_mem:
* User level interface (ioctl)
*/
+static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r,
+ bool getarp)
+{
+ struct net_device *dev;
+
+ if (getarp)
+ dev = dev_get_by_name_rcu(net, r->arp_dev);
+ else
+ dev = __dev_get_by_name(net, r->arp_dev);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */
+ if (!r->arp_ha.sa_family)
+ r->arp_ha.sa_family = dev->type;
+
+ if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
+static struct net_device *arp_req_dev(struct net *net, struct arpreq *r)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ __be32 ip;
+
+ if (r->arp_dev[0])
+ return arp_req_dev_by_name(net, r, false);
+
+ if (r->arp_flags & ATF_PUBL)
+ return NULL;
+
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
+ rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ ip_rt_put(rt);
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
/*
* Set (create) an ARP cache entry.
*/
@@ -1022,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != htonl(0xFFFFFFFF))
- return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
@@ -1034,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -ENODEV;
}
if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
return -ENOBUFS;
return 0;
@@ -1042,29 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return arp_req_set_proxy(net, dev, 1);
}
-static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_set(struct net *net, struct arpreq *r)
{
- __be32 ip;
struct neighbour *neigh;
+ struct net_device *dev;
+ __be32 ip;
int err;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_set_public(net, r, dev);
- ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (r->arp_flags & ATF_PERM)
- r->arp_flags |= ATF_COM;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
-
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
@@ -1086,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r,
break;
}
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
unsigned int state = NUD_STALE;
- if (r->arp_flags & ATF_PERM)
+
+ if (r->arp_flags & ATF_PERM) {
+ r->arp_flags |= ATF_COM;
state = NUD_PERMANENT;
+ }
+
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
@@ -1115,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh)
* Get an ARP cache entry.
*/
-static int arp_req_get(struct arpreq *r, struct net_device *dev)
+static int arp_req_get(struct net *net, struct arpreq *r)
{
__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
- int err = -ENXIO;
+ struct net_device *dev;
+
+ if (!r->arp_dev[0])
+ return -ENODEV;
+
+ dev = arp_req_dev_by_name(net, r, true);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha,
- min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
- err = 0;
- }
+ if (!neigh)
+ return -ENXIO;
+
+ if (READ_ONCE(neigh->nud_state) & NUD_NOARP) {
neigh_release(neigh);
+ return -ENXIO;
}
- return err;
+
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+
+ r->arp_ha.sa_family = dev->type;
+ netdev_copy_name(dev, r->arp_dev);
+
+ return 0;
}
int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
@@ -1166,36 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == htonl(0xFFFFFFFF))
- return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (mask)
- return -EINVAL;
+ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ }
return arp_req_set_proxy(net, dev, 0);
}
-static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_delete(struct net *net, struct arpreq *r)
{
+ struct net_device *dev;
__be32 ip;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
+
return arp_invalidate(dev, ip, true);
}
@@ -1205,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
- int err;
struct arpreq r;
- struct net_device *dev = NULL;
+ __be32 *netmask;
+ int err;
switch (cmd) {
case SIOCDARP:
@@ -1230,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_PUBL) &&
(r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
+
+ netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
- htonl(0xFFFFFFFFUL);
- rtnl_lock();
- if (r.arp_dev[0]) {
- err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
- if (!dev)
- goto out;
-
- /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
- if (!r.arp_ha.sa_family)
- r.arp_ha.sa_family = dev->type;
- err = -EINVAL;
- if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- goto out;
- } else if (cmd == SIOCGARP) {
- err = -ENODEV;
- goto out;
- }
+ *netmask = htonl(0xFFFFFFFFUL);
+ else if (*netmask && *netmask != htonl(0xFFFFFFFFUL))
+ return -EINVAL;
switch (cmd) {
case SIOCDARP:
- err = arp_req_delete(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_delete(net, &r);
+ rtnl_unlock();
break;
case SIOCSARP:
- err = arp_req_set(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_set(net, &r);
+ rtnl_unlock();
break;
case SIOCGARP:
- err = arp_req_get(&r, dev);
+ rcu_read_lock();
+ err = arp_req_get(net, &r);
+ rcu_read_unlock();
+
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
break;
}
-out:
- rtnl_unlock();
- if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
+
return err;
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index ae8b15e689..18227757ec 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -12,7 +12,7 @@
#include <net/bpf_sk_storage.h>
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+static struct bpf_struct_ops bpf_tcp_congestion_ops;
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
@@ -20,6 +20,7 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
+static const struct btf_type *tcp_congestion_ops_type;
static int bpf_tcp_ca_init(struct btf *btf)
{
@@ -36,6 +37,11 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+ type_id = btf_find_by_name_kind(btf, "tcp_congestion_ops", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ tcp_congestion_ops_type = btf_type_by_id(btf, type_id);
+
return 0;
}
@@ -101,6 +107,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
case offsetof(struct tcp_sock, snd_cwnd_cnt):
end = offsetofend(struct tcp_sock, snd_cwnd_cnt);
break;
+ case offsetof(struct tcp_sock, snd_cwnd_stamp):
+ end = offsetofend(struct tcp_sock, snd_cwnd_stamp);
+ break;
case offsetof(struct tcp_sock, snd_ssthresh):
end = offsetofend(struct tcp_sock, snd_ssthresh);
break;
@@ -149,7 +158,7 @@ static u32 prog_ops_moff(const struct bpf_prog *prog)
u32 midx;
midx = prog->expected_attach_type;
- t = bpf_tcp_congestion_ops.type;
+ t = tcp_congestion_ops_type;
m = &btf_type_member(t)[midx];
return __btf_member_bit_offset(t, m) / 8;
@@ -191,17 +200,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
-BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids)
+BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids)
BTF_ID_FLAGS(func, tcp_reno_ssthresh)
BTF_ID_FLAGS(func, tcp_reno_cong_avoid)
BTF_ID_FLAGS(func, tcp_reno_undo_cwnd)
BTF_ID_FLAGS(func, tcp_slow_start)
BTF_ID_FLAGS(func, tcp_cong_avoid_ai)
-BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids)
+BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids)
static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
.owner = THIS_MODULE,
@@ -301,7 +310,8 @@ static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
return 0;
}
-static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
+ const struct rate_sample *rs)
{
}
@@ -339,7 +349,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
.release = __bpf_tcp_ca_release,
};
-struct bpf_struct_ops bpf_tcp_congestion_ops = {
+static struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
@@ -350,10 +360,16 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
.cfi_stubs = &__bpf_ops_tcp_congestion_ops,
+ .owner = THIS_MODULE,
};
static int __init bpf_tcp_ca_kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_tcp_congestion_ops, tcp_congestion_ops);
+
+ return ret;
}
late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index d048aa8332..e9cb27061c 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -864,11 +864,8 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
net_clen_bits,
net_spot + 1,
1);
- if (net_spot < 0) {
- if (net_spot == -2)
- return -EFAULT;
+ if (net_spot < 0)
return 0;
- }
switch (doi_def->type) {
case CIPSO_V4_MAP_PASS:
@@ -1813,11 +1810,35 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
return CIPSO_V4_HDR_LEN + ret_val;
}
+static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len)
+{
+ int iter = 0, optlen = 0;
+
+ /* determining the new total option length is tricky because of
+ * the padding necessary, the only thing i can think to do at
+ * this point is walk the options one-by-one, skipping the
+ * padding at the end to determine the actual option size and
+ * from there we can determine the new total option length
+ */
+ while (iter < len) {
+ if (data[iter] == IPOPT_END) {
+ break;
+ } else if (data[iter] == IPOPT_NOP) {
+ iter++;
+ } else {
+ iter += data[iter + 1];
+ optlen = iter;
+ }
+ }
+ return optlen;
+}
+
/**
* cipso_v4_sock_setattr - Add a CIPSO option to a socket
* @sk: the socket
* @doi_def: the CIPSO DOI to use
* @secattr: the specific security attributes of the socket
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Set the CIPSO option on the given socket using the DOI definition and
@@ -1829,7 +1850,8 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
*/
int cipso_v4_sock_setattr(struct sock *sk,
const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val = -EPERM;
unsigned char *buf = NULL;
@@ -1879,8 +1901,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
sk_inet = inet_sk(sk);
- old = rcu_dereference_protected(sk_inet->inet_opt,
- lockdep_sock_is_held(sk));
+ old = rcu_dereference_protected(sk_inet->inet_opt, sk_locked);
if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
@@ -1988,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
u8 cipso_len;
u8 cipso_off;
unsigned char *cipso_ptr;
- int iter;
int optlen_new;
cipso_off = opt->opt.cipso - sizeof(struct iphdr);
@@ -2008,19 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
memmove(cipso_ptr, cipso_ptr + cipso_len,
opt->opt.optlen - cipso_off - cipso_len);
- /* determining the new total option length is tricky because of
- * the padding necessary, the only thing i can think to do at
- * this point is walk the options one-by-one, skipping the
- * padding at the end to determine the actual option size and
- * from there we can determine the new total option length */
- iter = 0;
- optlen_new = 0;
- while (iter < opt->opt.optlen)
- if (opt->opt.__data[iter] != IPOPT_NOP) {
- iter += opt->opt.__data[iter + 1];
- optlen_new = iter;
- } else
- iter++;
+ optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data,
+ opt->opt.optlen);
hdr_delta = opt->opt.optlen;
opt->opt.optlen = (optlen_new + 3) & ~3;
hdr_delta -= opt->opt.optlen;
@@ -2240,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
*/
int cipso_v4_skbuff_delattr(struct sk_buff *skb)
{
- int ret_val;
+ int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len,
+ hdr_len_delta;
struct iphdr *iph;
struct ip_options *opt = &IPCB(skb)->opt;
unsigned char *cipso_ptr;
@@ -2253,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb)
if (ret_val < 0)
return ret_val;
- /* the easiest thing to do is just replace the cipso option with noop
- * options since we don't change the size of the packet, although we
- * still need to recalculate the checksum */
-
iph = ip_hdr(skb);
cipso_ptr = (unsigned char *)iph + opt->cipso;
- memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+ cipso_len = cipso_ptr[1];
+
+ hdr_len_actual = sizeof(struct iphdr) +
+ cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1),
+ opt->optlen);
+ new_hdr_len_actual = hdr_len_actual - cipso_len;
+ new_hdr_len = (new_hdr_len_actual + 3) & ~3;
+ hdr_len_delta = (iph->ihl << 2) - new_hdr_len;
+
+ /* 1. shift any options after CIPSO to the left */
+ memmove(cipso_ptr, cipso_ptr + cipso_len,
+ new_hdr_len_actual - opt->cipso);
+ /* 2. move the whole IP header to its new place */
+ memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual);
+ /* 3. adjust the skb layout */
+ skb_pull(skb, hdr_len_delta);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ /* 4. re-fill new padding with IPOPT_END (may now be longer) */
+ memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END,
+ new_hdr_len - new_hdr_len_actual);
+
+ opt->optlen -= hdr_len_delta;
opt->cipso = 0;
opt->is_changed = 1;
-
+ if (hdr_len_delta != 0) {
+ iph->ihl = new_hdr_len >> 2;
+ iph_set_totlen(iph, skb->len);
+ }
ip_send_check(iph);
return 0;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 2cc50cbfc2..cc6d0bd7b0 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -119,7 +119,7 @@ void ip4_datagram_release_cb(struct sock *sk)
rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
dst = !IS_ERR(rt) ? &rt->dst : NULL;
sk_dst_set(sk, dst);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc74f131fe..d09f557eaa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -224,6 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(void)
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
+
if (ifa->ifa_dev)
in_dev_put(ifa->ifa_dev);
kfree(ifa);
@@ -231,7 +232,11 @@ static void inet_rcu_free_ifa(struct rcu_head *head)
static void inet_free_ifa(struct in_ifaddr *ifa)
{
- call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
+ /* Our reference to ifa->ifa_dev must be freed ASAP
+ * to release the reference to the netdev the same way.
+ * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
+ */
+ call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
}
static void in_dev_free_rcu(struct rcu_head *head)
@@ -713,34 +718,37 @@ static void check_lifetime(struct work_struct *work)
rcu_read_lock();
hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
- unsigned long age;
+ unsigned long age, tstamp;
+ u32 preferred_lft;
+ u32 valid_lft;
+ u32 flags;
- if (ifa->ifa_flags & IFA_F_PERMANENT)
+ flags = READ_ONCE(ifa->ifa_flags);
+ if (flags & IFA_F_PERMANENT)
continue;
+ preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
+ valid_lft = READ_ONCE(ifa->ifa_valid_lft);
+ tstamp = READ_ONCE(ifa->ifa_tstamp);
/* We try to batch several events at once. */
- age = (now - ifa->ifa_tstamp +
+ age = (now - tstamp +
ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
- if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
- age >= ifa->ifa_valid_lft) {
+ if (valid_lft != INFINITY_LIFE_TIME &&
+ age >= valid_lft) {
change_needed = true;
- } else if (ifa->ifa_preferred_lft ==
+ } else if (preferred_lft ==
INFINITY_LIFE_TIME) {
continue;
- } else if (age >= ifa->ifa_preferred_lft) {
- if (time_before(ifa->ifa_tstamp +
- ifa->ifa_valid_lft * HZ, next))
- next = ifa->ifa_tstamp +
- ifa->ifa_valid_lft * HZ;
+ } else if (age >= preferred_lft) {
+ if (time_before(tstamp + valid_lft * HZ, next))
+ next = tstamp + valid_lft * HZ;
- if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
+ if (!(flags & IFA_F_DEPRECATED))
change_needed = true;
- } else if (time_before(ifa->ifa_tstamp +
- ifa->ifa_preferred_lft * HZ,
+ } else if (time_before(tstamp + preferred_lft * HZ,
next)) {
- next = ifa->ifa_tstamp +
- ifa->ifa_preferred_lft * HZ;
+ next = tstamp + preferred_lft * HZ;
}
}
rcu_read_unlock();
@@ -804,24 +812,26 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
__u32 prefered_lft)
{
unsigned long timeout;
+ u32 flags;
- ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+ flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout))
- ifa->ifa_valid_lft = timeout;
+ WRITE_ONCE(ifa->ifa_valid_lft, timeout);
else
- ifa->ifa_flags |= IFA_F_PERMANENT;
+ flags |= IFA_F_PERMANENT;
timeout = addrconf_timeout_fixup(prefered_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
if (timeout == 0)
- ifa->ifa_flags |= IFA_F_DEPRECATED;
- ifa->ifa_preferred_lft = timeout;
+ flags |= IFA_F_DEPRECATED;
+ WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
}
- ifa->ifa_tstamp = jiffies;
+ WRITE_ONCE(ifa->ifa_flags, flags);
+ WRITE_ONCE(ifa->ifa_tstamp, jiffies);
if (!ifa->ifa_cstamp)
- ifa->ifa_cstamp = ifa->ifa_tstamp;
+ WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}
static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
@@ -1312,7 +1322,7 @@ static __be32 in_dev_select_addr(const struct in_device *in_dev,
const struct in_ifaddr *ifa;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
- if (ifa->ifa_flags & IFA_F_SECONDARY)
+ if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
continue;
if (ifa->ifa_scope != RT_SCOPE_LINK &&
ifa->ifa_scope <= scope)
@@ -1340,7 +1350,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
localnet_scope = RT_SCOPE_LINK;
in_dev_for_each_ifa_rcu(ifa, in_dev) {
- if (ifa->ifa_flags & IFA_F_SECONDARY)
+ if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
continue;
if (min(ifa->ifa_scope, localnet_scope) > scope)
continue;
@@ -1671,12 +1681,14 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}
-static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
+static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
struct inet_fill_args *args)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
+ unsigned long tstamp;
u32 preferred, valid;
+ u32 flags;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
args->flags);
@@ -1686,7 +1698,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = ifa->ifa_flags;
+
+ flags = READ_ONCE(ifa->ifa_flags);
+ /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
+ * The 32bit value is given in IFA_FLAGS attribute.
+ */
+ ifm->ifa_flags = (__u8)flags;
+
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
@@ -1694,11 +1712,12 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto nla_put_failure;
- if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
- preferred = ifa->ifa_preferred_lft;
- valid = ifa->ifa_valid_lft;
+ tstamp = READ_ONCE(ifa->ifa_tstamp);
+ if (!(flags & IFA_F_PERMANENT)) {
+ preferred = READ_ONCE(ifa->ifa_preferred_lft);
+ valid = READ_ONCE(ifa->ifa_valid_lft);
if (preferred != INFINITY_LIFE_TIME) {
- long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+ long tval = (jiffies - tstamp) / HZ;
if (preferred > tval)
preferred -= tval;
@@ -1725,10 +1744,10 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
(ifa->ifa_proto &&
nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
- nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ nla_put_u32(skb, IFA_FLAGS, flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
- put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+ put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
preferred, valid))
goto nla_put_failure;
@@ -1798,15 +1817,15 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
}
static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
- struct netlink_callback *cb, int s_ip_idx,
+ struct netlink_callback *cb, int *s_ip_idx,
struct inet_fill_args *fillargs)
{
struct in_ifaddr *ifa;
int ip_idx = 0;
int err;
- in_dev_for_each_ifa_rtnl(ifa, in_dev) {
- if (ip_idx < s_ip_idx) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ip_idx < *s_ip_idx) {
ip_idx++;
continue;
}
@@ -1818,9 +1837,9 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
ip_idx++;
}
err = 0;
-
+ ip_idx = 0;
done:
- cb->args[2] = ip_idx;
+ *s_ip_idx = ip_idx;
return err;
}
@@ -1830,7 +1849,7 @@ done:
static u32 inet_base_seq(const struct net *net)
{
u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
- net->dev_base_seq;
+ READ_ONCE(net->dev_base_seq);
/* Must not return 0 (see nl_dump_check_consistent()).
* Chose a value far away from 0.
@@ -1852,75 +1871,52 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
};
struct net *net = sock_net(skb->sk);
struct net *tgt_net = net;
- int h, s_h;
- int idx, s_idx;
- int s_ip_idx;
- struct net_device *dev;
+ struct {
+ unsigned long ifindex;
+ int ip_idx;
+ } *ctx = (void *)cb->ctx;
struct in_device *in_dev;
- struct hlist_head *head;
+ struct net_device *dev;
int err = 0;
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- s_ip_idx = cb->args[2];
-
+ rcu_read_lock();
if (cb->strict_check) {
err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- goto put_tgt_net;
+ goto done;
- err = 0;
if (fillargs.ifindex) {
- dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+ dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
if (!dev) {
err = -ENODEV;
- goto put_tgt_net;
- }
-
- in_dev = __in_dev_get_rtnl(dev);
- if (in_dev) {
- err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
- &fillargs);
+ goto done;
}
- goto put_tgt_net;
- }
- }
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = inet_base_seq(tgt_net);
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (h > s_h || idx > s_idx)
- s_ip_idx = 0;
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
- goto cont;
-
- err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
- &fillargs);
- if (err < 0) {
- rcu_read_unlock();
goto done;
- }
-cont:
- idx++;
+ err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ goto done;
}
- rcu_read_unlock();
}
+ cb->seq = inet_base_seq(tgt_net);
+
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ continue;
+ err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ if (err < 0)
+ goto done;
+ }
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
-
- return skb->len ? : err;
+ rcu_read_unlock();
+ return err;
}
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
@@ -1982,7 +1978,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
return -EMSGSIZE;
for (i = 0; i < IPV4_DEVCONF_MAX; i++)
- ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+ ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
return 0;
}
@@ -2068,9 +2064,9 @@ static int inet_netconf_msgsize_devconf(int type)
}
static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
- struct ipv4_devconf *devconf, u32 portid,
- u32 seq, int event, unsigned int flags,
- int type)
+ const struct ipv4_devconf *devconf,
+ u32 portid, u32 seq, int event,
+ unsigned int flags, int type)
{
struct nlmsghdr *nlh;
struct netconfmsg *ncm;
@@ -2095,27 +2091,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
- IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_RP_FILTER) &&
nla_put_s32(skb, NETCONFA_RP_FILTER,
- IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
+ IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_BC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_BC_FORWARDING,
- IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_PROXY_NEIGH) &&
nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
- IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
+ IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+ IPV4_DEVCONF_RO(*devconf,
+ IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
out:
@@ -2204,21 +2201,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
- struct nlattr *tb[NETCONFA_MAX+1];
+ struct nlattr *tb[NETCONFA_MAX + 1];
+ const struct ipv4_devconf *devconf;
+ struct in_device *in_dev = NULL;
+ struct net_device *dev = NULL;
struct sk_buff *skb;
- struct ipv4_devconf *devconf;
- struct in_device *in_dev;
- struct net_device *dev;
int ifindex;
int err;
err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
if (err)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
@@ -2229,10 +2225,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv4.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- goto errout;
- in_dev = __in_dev_get_rtnl(dev);
+ err = -ENODEV;
+ dev = dev_get_by_index(net, ifindex);
+ if (dev)
+ in_dev = in_dev_get(dev);
if (!in_dev)
goto errout;
devconf = &in_dev->cnf;
@@ -2256,6 +2252,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in_dev)
+ in_dev_put(in_dev);
+ dev_put(dev);
return err;
}
@@ -2264,11 +2263,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
+ const struct in_device *in_dev;
struct net_device *dev;
- struct in_device *in_dev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -2285,64 +2286,45 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = inet_base_seq(net);
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- in_dev = __in_dev_get_rcu(dev);
- if (!in_dev)
- goto cont;
-
- if (inet_netconf_fill_devconf(skb, dev->ifindex,
- &in_dev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ continue;
+ err = inet_netconf_fill_devconf(skb, dev->ifindex,
+ &in_dev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv4.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv4.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv4.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -2546,7 +2528,7 @@ static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
+ struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
} devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -2609,7 +2591,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
if (!t)
goto out;
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
t->devinet_vars[i].extra1 = p;
t->devinet_vars[i].extra2 = net;
@@ -2683,7 +2665,6 @@ static struct ctl_table ctl_forward_entry[] = {
.extra1 = &ipv4_devconf,
.extra2 = &init_net,
},
- { },
};
#endif
@@ -2780,7 +2761,7 @@ err_alloc_all:
static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
@@ -2823,7 +2804,9 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
- rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf, 0);
+ inet_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d33d124218..619a4df7be 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -20,6 +20,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/espintcp.h>
+#include <linux/skbuff_ref.h>
#include <linux/highmem.h>
@@ -114,7 +115,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(skb, sg_page(sg), false);
+ skb_page_unref(sg_page(sg), skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
@@ -238,8 +239,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
@@ -347,7 +347,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -362,12 +361,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -423,7 +416,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -775,7 +767,6 @@ int esp_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1179,9 +1170,6 @@ static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index b3271957ad..3f28ecbdca 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -56,6 +56,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 390f4be7f7..7ad2cafb92 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -916,7 +916,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct rtmsg *rtm;
int err, i;
- ASSERT_RTNL();
+ if (filter->rtnl_held)
+ ASSERT_RTNL();
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
@@ -961,7 +962,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
- filter->dev = __dev_get_by_index(net, ifindex);
+ if (filter->rtnl_held)
+ filter->dev = __dev_get_by_index(net, ifindex);
+ else
+ filter->dev = dev_get_by_index_rcu(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
@@ -983,20 +987,24 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = { .dump_routes = true,
- .dump_exceptions = true };
+ struct fib_dump_filter filter = {
+ .dump_routes = true,
+ .dump_exceptions = true,
+ .rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0, err;
+ int dumped = 0, err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -1005,29 +1013,26 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* ipv4 does not use prefix flag */
if (filter.flags & RTM_F_PREFIX)
- return skb->len;
+ goto unlock;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET)
- return skb->len;
+ goto unlock;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
-
- rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
- rcu_read_unlock();
- return skb->len ? : err;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
-
+ err = 0;
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
@@ -1038,25 +1043,20 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
err = fib_table_dump(tb, skb, cb, &filter);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
+ if (err < 0)
+ goto out;
dumped = 1;
next:
e++;
}
}
out:
- err = skb->len;
-out_err:
- rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
+unlock:
+ rcu_read_unlock();
return err;
}
@@ -1659,5 +1659,6 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5eb1b8d302..8956026bc0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -254,7 +254,7 @@ void free_fib_info(struct fib_info *fi)
return;
}
- call_rcu(&fi->rcu, free_fib_info_rcu);
+ call_rcu_hurry(&fi->rcu, free_fib_info_rcu);
}
EXPORT_SYMBOL_GPL(free_fib_info);
@@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
fib_select_default(fl4, res);
check_saddr:
- if (!fl4->saddr)
- fl4->saddr = fib_result_prefsrc(net, res);
+ if (!fl4->saddr) {
+ struct net_device *l3mdev;
+
+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
+
+ if (!l3mdev ||
+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
+ fl4->saddr = fib_result_prefsrc(net, res);
+ else
+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
+ }
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3ff35f8117..8f30e3f00b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -501,7 +501,7 @@ static void tnode_free(struct key_vector *tn)
if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
- synchronize_rcu();
+ synchronize_net();
}
}
@@ -1629,6 +1629,7 @@ set_result:
res->nhc = nhc;
res->type = fa->fa_type;
res->scope = fi->fib_scope;
+ res->dscp = fa->fa_dscp;
res->fi = fi;
res->table = tb;
res->fa_head = &n->leaf;
@@ -2368,7 +2369,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
* and key == 0 means the dump has wrapped around and we are done.
*/
if (count && !key)
- return skb->len;
+ return 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;
@@ -2394,7 +2395,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
cb->args[3] = key;
cb->args[2] = count;
- return skb->len;
+ return 0;
}
void __init fib_trie_init(void)
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 4da03bf45c..54984f3170 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -64,7 +64,7 @@ __bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
info->encap.type = TUNNEL_ENCAP_NONE;
}
- if (info->key.tun_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM;
info->encap.sport = encap->sport;
@@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
__bpf_kfunc_end_defs();
-BTF_SET8_START(fou_kfunc_set)
+BTF_KFUNCS_START(fou_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
BTF_ID_FLAGS(func, bpf_skb_get_fou_encap)
-BTF_SET8_END(fou_kfunc_set)
+BTF_KFUNCS_END(fou_kfunc_set)
static const struct btf_kfunc_id_set fou_bpf_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 0c41076e31..0abbc413e0 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -351,7 +351,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
optlen = guehdr->hlen << 2;
len += optlen;
- if (skb_gro_header_hard(skb, len)) {
+ if (!skb_gro_may_pull(skb, len)) {
guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
@@ -433,7 +433,7 @@ next_proto:
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
+ if (!ops || !ops->callbacks.gro_receive)
goto out;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index cbb2b4bb0d..6701a98d9a 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -73,7 +73,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ gre_flags_to_tnl_flags(tpi->flags, greh->flags);
hdr_len = gre_calc_hlen(tpi->flags);
if (!pskb_may_pull(skb, nhs + hdr_len))
@@ -217,5 +217,5 @@ module_init(gre_init);
module_exit(gre_exit);
MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 311e70bfce..5028c72d49 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
grehlen += GRE_HEADER_SECTION;
hlen = off + grehlen;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
greh = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!greh))
goto out;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 437e782b96..ab6d0d98db 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,8 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/icmp.h>
/*
* Build xmit assembly blocks
@@ -483,6 +485,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
+ struct dst_entry *dst, *dst2;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -508,16 +511,17 @@ static struct rtable *icmp_route_lookup(struct net *net,
/* No need to clone since we're just using its address. */
rt2 = rt;
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(fl4), NULL, 0);
- if (!IS_ERR(rt)) {
+ dst = xfrm_lookup(net, &rt->dst,
+ flowi4_to_flowi(fl4), NULL, 0);
+ rt = dst_rtable(dst);
+ if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
- } else if (PTR_ERR(rt) == -EPERM) {
+ } else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
- } else
+ } else {
return rt;
-
+ }
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
@@ -551,19 +555,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
- flowi4_to_flowi(&fl4_dec), NULL,
- XFRM_LOOKUP_ICMP);
- if (!IS_ERR(rt2)) {
+ dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL,
+ XFRM_LOOKUP_ICMP);
+ rt2 = dst_rtable(dst2);
+ if (!IS_ERR(dst2)) {
dst_release(&rt->dst);
memcpy(fl4, &fl4_dec, sizeof(*fl4));
rt = rt2;
- } else if (PTR_ERR(rt2) == -EPERM) {
+ } else if (PTR_ERR(dst2) == -EPERM) {
if (rt)
dst_release(&rt->dst);
return rt2;
} else {
- err = PTR_ERR(rt2);
+ err = PTR_ERR(dst2);
goto relookup_failed;
}
return rt;
@@ -768,6 +772,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
+ trace_icmp_send(skb_in, type, code);
+
icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index efeeca2b13..9bf09de6a2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -120,12 +120,12 @@
*/
#define IGMP_V1_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
((in_dev)->mr_v1_seen && \
time_before(jiffies, (in_dev)->mr_v1_seen)))
#define IGMP_V2_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
((in_dev)->mr_v2_seen && \
time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
if (!dev) {
struct rtable *rt = ip_route_output(net,
imr->imr_multiaddr.s_addr,
- 0, 0, 0);
+ 0, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
dev = rt->dst.dev;
ip_rt_put(rt);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d1492c649a..d4f0eff8b2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -661,7 +661,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
/*
* This will accept the next outstanding connection.
*/
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
+struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
@@ -680,7 +680,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
/* Find already established connection */
if (reqsk_queue_empty(queue)) {
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
@@ -692,6 +692,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
goto out_err;
}
req = reqsk_queue_remove(queue, sk);
+ arg->is_empty = reqsk_queue_empty(queue);
newsk = req->sk;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -745,7 +746,7 @@ out:
out_err:
newsk = NULL;
req = NULL;
- *err = error;
+ arg->err = error;
goto out;
}
EXPORT_SYMBOL(inet_csk_accept);
@@ -930,8 +931,9 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
memcpy(nreq_sk, req_sk,
offsetof(struct sock, sk_dontcopy_begin));
- memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
- req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+ unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+ req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end),
+ /* alloc is larger than struct, see above */);
sk_node_init(&nreq_sk->sk_node);
nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
@@ -1120,25 +1122,34 @@ drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
+ bool found_dup_sk = false;
+
+ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
+ return false;
+
+ /* The timer needs to be setup after a successful insertion. */
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+ return true;
}
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
- reqsk_queue_hash_req(req, timeout);
+ if (!reqsk_queue_hash_req(req, timeout))
+ return false;
+
inet_csk_reqsk_queue_added(sk);
+ return true;
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
@@ -1491,7 +1502,7 @@ static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *f
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
if (IS_ERR(rt))
rt = NULL;
if (rt)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9804e9608a..9712cdb808 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -32,7 +32,7 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct inet_diag_handler **inet_diag_table;
+static const struct inet_diag_handler __rcu **inet_diag_table;
struct inet_diag_entry {
const __be32 *saddr;
@@ -48,28 +48,28 @@ struct inet_diag_entry {
#endif
};
-static DEFINE_MUTEX(inet_diag_table_mutex);
-
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (proto < 0 || proto >= IPPROTO_MAX) {
- mutex_lock(&inet_diag_table_mutex);
- return ERR_PTR(-ENOENT);
- }
+ const struct inet_diag_handler *handler;
+
+ if (proto < 0 || proto >= IPPROTO_MAX)
+ return NULL;
if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
- mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[proto])
- return ERR_PTR(-ENOENT);
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[proto]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
- return inet_diag_table[proto];
+ return handler;
}
static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
{
- mutex_unlock(&inet_diag_table_mutex);
+ module_put(handler->owner);
}
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
@@ -104,9 +104,12 @@ static size_t inet_sk_attr_size(struct sock *sk,
const struct inet_diag_handler *handler;
size_t aux = 0;
- handler = inet_diag_table[req->sdiag_protocol];
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
if (handler && handler->idiag_get_aux_size)
aux = handler->idiag_get_aux_size(sk, net_admin);
+ rcu_read_unlock();
return nla_total_size(sizeof(struct tcp_info))
+ nla_total_size(sizeof(struct inet_diag_msg))
@@ -244,10 +247,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ int protocol;
cb_data = cb->data;
- handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)];
- BUG_ON(!handler);
+ protocol = inet_diag_get_protocol(req, cb_data);
+
+ /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */
+ handler = rcu_dereference_protected(inet_diag_table[protocol], 1);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
+ if (!handler)
+ return -ENXIO;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
@@ -605,9 +614,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
protocol = inet_diag_get_protocol(req, &dump_data);
handler = inet_diag_lock_handler(protocol);
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- } else if (cmd == SOCK_DIAG_BY_FAMILY) {
+ if (!handler)
+ return -ENOENT;
+
+ if (cmd == SOCK_DIAG_BY_FAMILY) {
struct netlink_callback cb = {
.nlh = nlh,
.skb = in_skb,
@@ -1035,6 +1045,10 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
num = 0;
ilb = &hashinfo->lhash2[i];
+ if (hlist_nulls_empty(&ilb->nulls_head)) {
+ s_num = 0;
+ continue;
+ }
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
@@ -1099,6 +1113,10 @@ resume_bind_walk:
accum = 0;
ibb = &hashinfo->bhash2[i];
+ if (hlist_empty(&ibb->chain)) {
+ s_num = 0;
+ continue;
+ }
spin_lock_bh(&ibb->lock);
inet_bind_bucket_for_each(tb2, &ibb->chain) {
if (!net_eq(ib2_net(tb2), net))
@@ -1259,12 +1277,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
again:
prev_min_dump_alloc = cb->min_dump_alloc;
handler = inet_diag_lock_handler(protocol);
- if (!IS_ERR(handler))
+ if (handler) {
handler->dump(skb, cb, r);
- else
- err = PTR_ERR(handler);
- inet_diag_unlock_handler(handler);
-
+ inet_diag_unlock_handler(handler);
+ } else {
+ err = -ENOENT;
+ }
/* The skb is not large enough to fit one sk info and
* inet_sk_diag_fill() has requested for a larger skb.
*/
@@ -1365,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
req.sdiag_family = AF_UNSPEC; /* compatibility */
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
@@ -1380,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.sdiag_family = rc->idiag_family;
req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
@@ -1457,10 +1477,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
handler = inet_diag_lock_handler(sk->sk_protocol);
- if (IS_ERR(handler)) {
- inet_diag_unlock_handler(handler);
+ if (!handler) {
nlmsg_cancel(skb, nlh);
- return PTR_ERR(handler);
+ return -ENOENT;
}
attr = handler->idiag_info_size
@@ -1479,6 +1498,7 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
static const struct sock_diag_handler inet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1486,6 +1506,7 @@ static const struct sock_diag_handler inet_diag_handler = {
};
static const struct sock_diag_handler inet6_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET6,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1495,20 +1516,12 @@ static const struct sock_diag_handler inet6_diag_handler = {
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- int err = -EINVAL;
if (type >= IPPROTO_MAX)
- goto out;
+ return -EINVAL;
- mutex_lock(&inet_diag_table_mutex);
- err = -EEXIST;
- if (!inet_diag_table[type]) {
- WRITE_ONCE(inet_diag_table[type], h);
- err = 0;
- }
- mutex_unlock(&inet_diag_table_mutex);
-out:
- return err;
+ return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL, h) ? 0 : -EEXIST;
}
EXPORT_SYMBOL_GPL(inet_diag_register);
@@ -1519,12 +1532,16 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
if (type >= IPPROTO_MAX)
return;
- mutex_lock(&inet_diag_table_mutex);
- WRITE_ONCE(inet_diag_table[type], NULL);
- mutex_unlock(&inet_diag_table_mutex);
+ xchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
+static const struct sock_diag_inet_compat inet_diag_compat = {
+ .owner = THIS_MODULE,
+ .fn = inet_diag_rcv_msg_compat,
+};
+
static int __init inet_diag_init(void)
{
const int inet_diag_table_size = (IPPROTO_MAX *
@@ -1543,7 +1560,7 @@ static int __init inet_diag_init(void)
if (err)
goto out_free_inet;
- sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_register_inet_compat(&inet_diag_compat);
out:
return err;
@@ -1558,7 +1575,7 @@ static void __exit inet_diag_exit(void)
{
sock_diag_unregister(&inet6_diag_handler);
sock_diag_unregister(&inet_diag_handler);
- sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_unregister_inet_compat(&inet_diag_compat);
kfree(inet_diag_table);
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c88c9034d6..faaec92a46 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -175,7 +175,7 @@ static void fqdir_free_fn(struct work_struct *work)
}
}
-static DECLARE_WORK(fqdir_free_work, fqdir_free_fn);
+static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn);
static void fqdir_work_fn(struct work_struct *work)
{
@@ -184,7 +184,7 @@ static void fqdir_work_fn(struct work_struct *work)
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
if (llist_add(&fqdir->free_list, &fqdir_free_list))
- queue_work(system_wq, &fqdir_free_work);
+ queue_delayed_work(system_wq, &fqdir_free_work, HZ);
}
int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4e470f1848..48d0d49418 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,7 @@
#include <net/inet6_hashtables.h>
#endif
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>
@@ -32,8 +33,6 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
const __be16 fport)
{
- static u32 inet_ehash_secret __read_mostly;
-
net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
@@ -566,7 +565,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index e8de45d34d..e28075f000 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -264,14 +264,18 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+void inet_twsk_purge(struct inet_hashinfo *hashinfo)
{
+ struct inet_ehash_bucket *head = &hashinfo->ehash[0];
+ unsigned int ehash_mask = hashinfo->ehash_mask;
struct hlist_nulls_node *node;
unsigned int slot;
struct sock *sk;
- for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ for (slot = 0; slot <= ehash_mask; slot++, head++) {
+ if (hlist_nulls_empty(&head->chain))
+ continue;
+
restart_rcu:
cond_resched();
rcu_read_lock();
@@ -283,15 +287,13 @@ restart:
TCPF_NEW_SYN_RECV))
continue;
- if (sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))
+ if (refcount_read(&sock_net(sk)->ns.count))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely(sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))) {
+ if (refcount_read(&sock_net(sk)->ns.count)) {
sock_gen_put(sk);
goto restart;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e9fed83e9b..5bd7599634 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,10 +81,7 @@ void __init inet_initpeers(void)
inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
- peer_cachep = kmem_cache_create("inet_peer_cache",
- sizeof(struct inet_peer),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
- NULL);
+ peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}
/* Called with rcu_read_lock() or base->lock held */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb947d1613..08e2c92e25 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -580,7 +580,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &dist_min,
},
- { }
};
/* secret interval has been deprecated */
@@ -593,7 +592,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip4_frags_ns_ctl_register(struct net *net)
@@ -632,7 +630,7 @@ err_alloc:
static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv4.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.frags_hdr);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1954a56fec..ba20547352 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -265,6 +265,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -272,12 +273,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
int ver;
int len;
+ ip_tunnel_flags_copy(flags, tpi->flags);
+
itn = net_generic(net, erspan_net_id);
iph = ip_hdr(skb);
if (is_erspan_type1(gre_hdr_len)) {
ver = 0;
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
} else {
if (unlikely(!pskb_may_pull(skb,
@@ -287,8 +290,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_KEY,
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, tpi->key);
}
@@ -312,10 +315,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags,
@@ -338,7 +340,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ERSPAN_V2_MDSIZE);
info = &tun_dst->u.tun_info;
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
}
@@ -381,10 +384,13 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
tnl_params = &tunnel->parms.iph;
if (tunnel->collect_md || tnl_params->daddr == 0) {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
__be64 tun_id;
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ ip_tunnel_flags_and(flags, tpi->flags, flags);
+
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
@@ -464,12 +470,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags = tunnel->parms.o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
flags, proto, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -483,10 +492,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
int tunnel_hlen;
- __be16 flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -500,14 +509,19 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
/* Push Tunnel header. */
- if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto err_free_skb;
- flags = tun_info->key.tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
+
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -521,6 +535,7 @@ err_free_skb:
static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
@@ -536,7 +551,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
goto err_free_skb;
if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
@@ -589,8 +604,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
}
- gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -664,7 +680,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
@@ -706,7 +723,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 0) {
proto = htons(ETH_P_ERSPAN);
- tunnel->parms.o_flags &= ~TUNNEL_SEQ;
+ __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
} else if (tunnel->erspan_ver == 1) {
erspan_build_header(skb, ntohl(tunnel->parms.o_key),
tunnel->index,
@@ -721,7 +738,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
}
- tunnel->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
return NETDEV_TX_OK;
@@ -744,7 +761,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
if (skb_cow_head(skb, dev->needed_headroom))
@@ -762,7 +780,6 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -776,12 +793,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
dev->needed_headroom += len;
if (set_mtu)
- dev->mtu = max_t(int, dev->mtu - len, 68);
-
- flags = tunnel->parms.o_flags;
+ WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
- if (flags & TUNNEL_SEQ ||
- (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
+ (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
} else {
@@ -790,20 +806,29 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
}
}
-static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
+static int ipgre_tunnel_ctl(struct net_device *dev,
+ struct ip_tunnel_parm_kern *p,
int cmd)
{
+ __be16 i_flags, o_flags;
int err;
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ i_flags = ip_tunnel_flags_to_be16(p->i_flags);
+ o_flags = ip_tunnel_flags_to_be16(p->o_flags);
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
- ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
+ ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
- p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, o_flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
@@ -812,15 +837,18 @@ static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
if (cmd == SIOCCHGTUNNEL) {
struct ip_tunnel *t = netdev_priv(dev);
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
ipgre_link_update(dev, true);
}
- p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
- p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ ip_tunnel_flags_from_be16(p->i_flags, i_flags);
+ o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ ip_tunnel_flags_from_be16(p->o_flags, o_flags);
+
return 0;
}
@@ -960,7 +988,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
- __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -972,14 +999,13 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- flags = tunnel->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1030,14 +1056,16 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit_batch = ipgre_exit_batch_net,
+ .exit_batch_rtnl = ipgre_exit_batch_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1134,7 +1162,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1150,10 +1178,12 @@ static int ipgre_netlink_parms(struct net_device *dev,
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1201,7 +1231,7 @@ static int ipgre_netlink_parms(struct net_device *dev,
static int erspan_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1360,7 +1390,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1378,7 +1408,7 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1397,8 +1427,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1413,8 +1443,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
ipgre_link_update(dev, !tb[IFLA_MTU]);
@@ -1426,8 +1456,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1442,8 +1472,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
return 0;
}
@@ -1499,8 +1529,10 @@ static size_t ipgre_get_size(const struct net_device *dev)
static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ struct ip_tunnel_parm_kern *p = &t->parms;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -1548,7 +1580,7 @@ static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (t->erspan_ver <= 2) {
if (t->erspan_ver != 0 && !t->collect_md)
- t->parms.o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
goto nla_put_failure;
@@ -1702,14 +1734,16 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit_batch = ipgre_tap_exit_batch_net,
+ .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1720,14 +1754,16 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
+ dev_to_kill);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit_batch = erspan_exit_batch_net,
+ .exit_batch_rtnl = erspan_exit_batch_rtnl,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 5e9c815665..d6fbcbd235 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip_extract_route_hint(net, skb,
- ((struct rtable *)dst)->rt_type);
+ dst_rtable(dst)->rt_type);
/* dispatch old sublist */
if (!list_empty(&sublist))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 67d8466223..9500031a1f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
goto packet_routed;
/* Make sure we can route this packet. */
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
if (!rt) {
__be32 daddr;
@@ -493,7 +493,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_CONN_FLAGS_TOS(sk, tos),
+ RT_TOS(tos),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk,
bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
@@ -1473,7 +1473,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* by icmp_hdr(skb)->type.
*/
if (sk->sk_type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 21d2ffa919..cf377377b5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -894,7 +894,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
- int val = 0, err;
+ int val = 0, err, retv;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
switch (optname) {
@@ -938,8 +938,12 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
/* If optlen==0, it is equivalent to val == 0 */
- if (optname == IP_ROUTER_ALERT)
- return ip_ra_control(sk, val ? 1 : 0, NULL);
+ if (optname == IP_ROUTER_ALERT) {
+ retv = ip_ra_control(sk, val ? 1 : 0, NULL);
+ if (retv == 0)
+ inet_assign_bit(RTALERT, sk, val);
+ return retv;
+ }
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
@@ -1575,6 +1579,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_BIND_ADDRESS_NO_PORT:
val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
goto copyval;
+ case IP_ROUTER_ALERT:
+ val = inet_test_bit(RTALERT, sk);
+ goto copyval;
case IP_TTL:
val = READ_ONCE(inet->uc_ttl);
if (val < 0)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 55039191b8..bccef2fcf6 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,17 +56,13 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
+ const unsigned long *flags, __be32 key)
{
- if (p->i_flags & TUNNEL_KEY) {
- if (flags & TUNNEL_KEY)
- return key == p->i_key;
- else
- /* key expected, none present */
- return false;
- } else
- return !(flags & TUNNEL_KEY);
+ if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
+ return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
+
+ return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
}
/* Fallback tunnel: no source, no destination, no key, no options
@@ -81,7 +77,7 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
Given src, dst and key, find appropriate for input tunnel.
*/
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
+ int link, const unsigned long *flags,
__be32 remote, __be32 local,
__be32 key)
{
@@ -102,10 +98,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else
- cand = t;
+ cand = t;
}
hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -117,9 +112,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -137,22 +132,23 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
+ if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
+ t->parms.i_key != key) ||
t->parms.iph.saddr != 0 ||
t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -172,7 +168,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
unsigned int h;
__be32 remote;
@@ -183,7 +179,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
else
remote = 0;
- if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
+ test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
i_key = 0;
h = ip_tunnel_hash(i_key, remote);
@@ -207,21 +204,23 @@ static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
}
static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
int type)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be32 key = parms->i_key;
- __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
+ ip_tunnel_flags_copy(flags, parms->i_flags);
+
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- link == t->parms.link &&
+ link == READ_ONCE(t->parms.link) &&
type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
@@ -231,7 +230,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
static struct net_device *__ip_tunnel_create(struct net *net,
const struct rtnl_link_ops *ops,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
int err;
struct ip_tunnel *tunnel;
@@ -327,7 +326,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
struct ip_tunnel *nt;
struct net_device *dev;
@@ -387,15 +386,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
}
#endif
- if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
- ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags&TUNNEL_SEQ) {
- if (!(tpi->flags&TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
@@ -544,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rt6_info *rt6;
__be32 daddr;
- rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+ rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
NULL;
daddr = md ? dst : tunnel->parms.iph.daddr;
@@ -639,7 +638,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
df = htons(IP_DF);
if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
key->u.ipv4.dst, true)) {
@@ -774,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos),
- dev_net(dev), tunnel->parms.link,
+ dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
@@ -872,7 +871,7 @@ EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
- struct ip_tunnel_parm *p,
+ struct ip_tunnel_parm_kern *p,
bool set_mtu,
__u32 fwmark)
{
@@ -894,17 +893,18 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
- t->parms.link = p->link;
+ WRITE_ONCE(t->parms.link, p->link);
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
- dev->mtu = mtu;
+ WRITE_ONCE(dev->mtu, mtu);
}
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
-int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
int err = 0;
struct ip_tunnel *t = netdev_priv(dev);
@@ -928,10 +928,10 @@ int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags & VTI_ISVTI)) {
- if (!(p->i_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
p->i_key = 0;
- if (!(p->o_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
p->o_key = 0;
}
@@ -1006,16 +1006,58 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
+ const void __user *data)
+{
+ struct ip_tunnel_parm p;
+
+ if (copy_from_user(&p, data, sizeof(p)))
+ return false;
+
+ strscpy(kp->name, p.name);
+ kp->link = p.link;
+ ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
+ ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
+ kp->i_key = p.i_key;
+ kp->o_key = p.o_key;
+ memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
+
+bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
+{
+ struct ip_tunnel_parm p;
+
+ if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(kp->o_flags))
+ return false;
+
+ memset(&p, 0, sizeof(p));
+
+ strscpy(p.name, kp->name);
+ p.link = kp->link;
+ p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
+ p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
+ p.i_key = kp->i_key;
+ p.o_key = kp->o_key;
+ memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
+
+ return !copy_to_user(data, &p, sizeof(p));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
+
int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
void __user *data, int cmd)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
int err;
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
- if (!err && copy_to_user(data, &p, sizeof(p)))
+ if (!err && !ip_tunnel_parm_to_user(data, &p))
return -EFAULT;
return err;
}
@@ -1040,7 +1082,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
new_mtu = max_mtu;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
@@ -1078,15 +1120,15 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip_tunnel_get_link_net);
int ip_tunnel_get_iflink(const struct net_device *dev)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->parms.link;
+ return READ_ONCE(tunnel->parms.link);
}
EXPORT_SYMBOL(ip_tunnel_get_iflink);
@@ -1094,7 +1136,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
unsigned int i;
itn->rtnl_link_ops = ops;
@@ -1157,24 +1199,22 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
}
void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops)
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill)
{
struct ip_tunnel_net *itn;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, &list, ops);
+ ip_tunnel_destroy(net, itn, dev_to_kill, ops);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
@@ -1228,7 +1268,7 @@ err_register_netdevice:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 80ccd6661a..a3676155be 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(__iptunnel_pull_header);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags)
{
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
struct metadata_dst *res;
struct ip_tunnel_info *dst, *src;
@@ -144,10 +145,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
sizeof(struct in6_addr));
else
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
- dst->key.tun_flags = src->key.tun_flags;
+ ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags);
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
- src->options_len, 0);
+ src->options_len, tun_flags);
return res;
}
@@ -497,7 +498,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
opt->opt_class = nla_get_be16(attr);
attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE];
opt->type = nla_get_u8(attr);
- info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct geneve_opt) + data_len;
@@ -525,7 +526,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
md->gbp &= VXLAN_GBP_MASK;
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct vxlan_metadata);
@@ -574,7 +575,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
set_hwid(&md->u.md2, nla_get_u8(attr));
}
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct erspan_metadata);
@@ -585,7 +586,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
{
int err, rem, opt_len, opts_len = 0;
struct nlattr *nla;
- __be16 type = 0;
+ u32 type = 0;
if (!attr)
return 0;
@@ -598,7 +599,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
switch (nla_type(nla)) {
case LWTUNNEL_IP_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len,
extack);
@@ -607,7 +608,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
opts_len += opt_len;
if (opts_len > IP_TUNNEL_OPTS_MAX)
return -EINVAL;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_VXLAN:
if (type)
@@ -617,7 +618,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_ERSPAN:
if (type)
@@ -627,7 +628,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
default:
return -EINVAL;
@@ -705,10 +706,16 @@ static int ip_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP_TOS])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
- if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_from_be16(flags,
+ nla_get_be16(tb[LWTUNNEL_IP_FLAGS]));
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX;
tun_info->options_len = opt_len;
@@ -812,18 +819,18 @@ static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type,
struct nlattr *nest;
int err = 0;
- if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(tun_info->key.tun_flags))
return 0;
nest = nla_nest_start_noflag(skb, type);
if (!nest)
return -ENOMEM;
- if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_geneve(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_vxlan(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_erspan(skb, tun_info);
if (err) {
@@ -846,7 +853,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info))
return -ENOMEM;
@@ -857,11 +865,11 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
{
int opt_len;
- if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(info->key.tun_flags))
return 0;
opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -874,10 +882,10 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
/* OPT_GENEVE_DATA */
offset += sizeof(*opt) + opt->length * 4;
}
- } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
- } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
struct erspan_metadata *md = ip_tunnel_info_opts(info);
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
@@ -984,10 +992,17 @@ static int ip6_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP6_TC])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
- if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP6_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+ __be16 data;
+
+ data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
+ ip_tunnel_flags_from_be16(flags, data);
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
tun_info->options_len = opt_len;
@@ -1008,7 +1023,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info))
return -ENOMEM;
@@ -1116,7 +1132,7 @@ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms);
void ip_tunnel_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
if (data[IFLA_IPTUN_LINK])
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
@@ -1139,8 +1155,12 @@ void ip_tunnel_netlink_parms(struct nlattr *data[],
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
- if (data[IFLA_IPTUN_FLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ if (data[IFLA_IPTUN_FLAGS]) {
+ __be16 flags;
+
+ flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ ip_tunnel_flags_from_be16(parms->i_flags, flags);
+ }
if (data[IFLA_IPTUN_PROTO])
parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index d1d6bb28ed..14536da9f5 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -51,8 +51,11 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -167,7 +170,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parms = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
int pkt_len = skb->len;
@@ -322,8 +325,11 @@ static int vti4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
int protocol = iph->protocol;
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->daddr, iph->saddr, 0);
if (!tunnel)
return -1;
@@ -373,8 +379,9 @@ static int vti4_err(struct sk_buff *skb, u32 info)
}
static int
-vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
int err = 0;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
@@ -383,20 +390,26 @@ vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
return -EINVAL;
}
- if (!(p->i_flags & GRE_KEY))
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ if (!(ip_tunnel_flags_to_be16(p->i_flags) & GRE_KEY))
p->i_key = 0;
- if (!(p->o_flags & GRE_KEY))
+ if (!(ip_tunnel_flags_to_be16(p->o_flags) & GRE_KEY))
p->o_key = 0;
- p->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, flags);
+ ip_tunnel_flags_copy(p->i_flags, flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p->i_flags |= GRE_KEY;
- p->o_flags |= GRE_KEY;
+ ip_tunnel_flags_from_be16(flags, GRE_KEY);
+ ip_tunnel_flags_or(p->i_flags, p->i_flags, flags);
+ ip_tunnel_flags_or(p->o_flags, p->o_flags, flags);
}
return 0;
}
@@ -510,14 +523,16 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_batch_net(struct list_head *list_net)
+static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
+ dev_to_kill);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit_batch = vti_exit_batch_net,
+ .exit_batch_rtnl = vti_exit_batch_rtnl,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -529,7 +544,7 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void vti_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -539,7 +554,7 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
- parms->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, parms->i_flags);
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -564,7 +579,7 @@ static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
__u32 fwmark = 0;
vti_netlink_parms(data, &parms, &fwmark);
@@ -576,8 +591,8 @@ static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
vti_netlink_parms(data, &p, &fwmark);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
@@ -604,7 +619,7 @@ static size_t vti_get_size(const struct net_device *dev)
static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
+ struct ip_tunnel_parm_kern *p = &t->parms;
if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) ||
nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 03afa3871e..923a2ef68c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -130,13 +130,16 @@ static int ipip_err(struct sk_buff *skb, u32 info)
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err = 0;
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->daddr, iph->saddr, 0);
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr,
+ iph->saddr, 0);
if (!t) {
err = -ENOENT;
goto out;
@@ -213,13 +216,16 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tun_dst = NULL;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->saddr, iph->daddr, 0);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr,
+ iph->daddr, 0);
if (tunnel) {
const struct tnl_ptk_info *tpi;
@@ -238,7 +244,9 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (tunnel->collect_md) {
- tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+ ip_tunnel_flags_zero(flags);
+
+ tun_dst = ip_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
return 0;
ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info);
@@ -330,7 +338,7 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
}
static int
-ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 ||
@@ -340,7 +348,8 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
p->i_key = p->o_key = 0;
- p->i_flags = p->o_flags = 0;
+ ip_tunnel_flags_zero(p->i_flags);
+ ip_tunnel_flags_zero(p->o_flags);
return ip_tunnel_ctl(dev, p, cmd);
}
@@ -405,8 +414,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms, bool *collect_md,
- __u32 *fwmark)
+ struct ip_tunnel_parm_kern *parms,
+ bool *collect_md, __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -432,8 +441,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
@@ -452,8 +461,8 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
bool collect_md;
__u32 fwmark = t->fwmark;
@@ -510,7 +519,7 @@ static size_t ipip_get_size(const struct net_device *dev)
static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
@@ -592,14 +601,16 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit_batch = ipip_exit_batch_net,
+ .exit_batch_rtnl = ipip_exit_batch_rtnl,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b53c36c473..6c750bd13d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -441,7 +441,7 @@ static bool ipmr_init_vif_indev(const struct net_device *dev)
static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
struct net_device *tunnel_dev, *new_dev;
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
tunnel_dev = __dev_get_by_name(net, "tunl0");
@@ -2589,7 +2589,9 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
@@ -3141,10 +3143,7 @@ int __init ip_mr_init(void)
{
int err;
- mrt_cachep = kmem_cache_create("ip_mrt_cache",
- sizeof(struct mfc_cache),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
- NULL);
+ mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = register_pernet_subsys(&ipmr_net_ops);
if (err)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f71a7e9a7d..1b991b8895 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4
tristate
default n
+# old sockopt interface and eval loop
+config IP_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
@@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP_NF_MANGLE || IP_NF_RAW
+ depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -182,7 +187,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP_NF_FILTER
+ depends on IP_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV4
default m if NETFILTER_ADVANCED=n
help
@@ -212,6 +217,7 @@ config IP_NF_NAT
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
+ select IP_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -252,6 +258,7 @@ endif # IP_NF_NAT
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -261,7 +268,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_ECN
tristate "ECN target support"
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `ECN' target, which can be used in the iptables mangle
@@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -299,6 +307,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -309,36 +318,36 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
- tristate "ARP tables support"
- select NETFILTER_XTABLES
- select NETFILTER_FAMILY_ARP
- depends on NETFILTER_ADVANCED
- help
- arptables is a general, extensible packet identification framework.
- The ARP packet filtering and mangling (manipulation)subsystems
- use this: say Y or M here if you want to use either of those.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate
-if IP_NF_ARPTABLES
+config NFT_COMPAT_ARP
+ tristate
+ depends on NF_TABLES_ARP && NFT_COMPAT
+ default m if NFT_COMPAT=m
+ default y if NFT_COMPAT=y
config IP_NF_ARPFILTER
- tristate "ARP packet filtering"
+ tristate "arptables-legacy packet filtering support"
+ select IP_NF_ARPTABLES
+ select NETFILTER_FAMILY_ARP
+ depends on NETFILTER_XTABLES
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
- local output. On a bridge, you can also specify filtering rules
- for forwarded ARP packets. See the man page for arptables(8).
+ local output. This is only needed for arptables-legacy(8).
+ Neither arptables-nft nor nftables need this to work.
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_ARP_MANGLE
tristate "ARP payload mangling"
+ depends on IP_NF_ARPTABLES || NFT_COMPAT_ARP
help
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
-endif # IP_NF_ARPTABLES
+ This option is needed by both arptables-legacy and arptables-nft.
+ It is not used by nftables.
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5a26f9de1a..85502d4dfb 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# generic IP tables
-obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index b9062f4552..3ab908b747 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -44,7 +44,7 @@ static int iptable_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ipt_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ipt_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 4d42d0756f..a5db7c67d6 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = {
static int __init iptable_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv4_table,
- iptable_nat_table_init);
+ int ret;
+ /* net->gen->ptr[iptable_nat_net_id] must be allocated
+ * before calling iptable_nat_table_init().
+ */
+ ret = register_pernet_subsys(&iptable_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&iptable_nat_net_ops);
- if (ret < 0) {
- xt_unregister_template(&nf_nat_ipv4_table);
- return ret;
- }
+ ret = xt_register_template(&nf_nat_ipv4_table,
+ iptable_nat_table_init);
+ if (ret < 0)
+ unregister_pernet_subsys(&iptable_nat_net_ops);
return ret;
}
static void __exit iptable_nat_exit(void)
{
- unregister_pernet_subsys(&iptable_nat_net_ops);
xt_unregister_template(&nf_nat_ipv4_table);
+ unregister_pernet_subsys(&iptable_nat_net_ops);
}
module_init(iptable_nat_init);
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 69e3317996..73e66a088e 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
+ if (!indev)
+ return daddr;
in_dev_for_each_ifa_rcu(ifa, indev) {
if (ifa->ifa_flags & IFA_F_SECONDARY)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bbff68b5b5..6b9787ee86 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \
+ NHA_OP_FLAG_DUMP_HW_STATS)
+
static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY },
@@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_FDB] = { .type = NLA_FLAG },
[NHA_RES_GROUP] = { .type = NLA_NESTED },
+ [NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true),
};
static const struct nla_policy rtm_nh_policy_get[] = {
[NHA_ID] = { .type = NLA_U32 },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
+};
+
+static const struct nla_policy rtm_nh_policy_del[] = {
+ [NHA_ID] = { .type = NLA_U32 },
};
static const struct nla_policy rtm_nh_policy_dump[] = {
@@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
};
static const struct nla_policy rtm_nh_res_policy_new[] = {
@@ -92,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
+ nh_info->id = nhi->nh_parent->id;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
@@ -131,13 +144,13 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh;
+ info->nh_grp->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
nhi = rtnl_dereference(nhge->nh->nh_info);
- info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhi);
@@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
return -ENOMEM;
info->nh_res_table->num_nh_buckets = num_nh_buckets;
+ info->nh_res_table->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh_buckets; i++) {
struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
@@ -393,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
struct nh_notifier_info info = {
.net = net,
.extack = extack,
+ .id = nh->id,
};
struct nh_group *nhg;
int err;
@@ -474,6 +489,7 @@ static void nexthop_free_group(struct nexthop *nh)
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
WARN_ON(!list_empty(&nhge->nh_list));
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
}
@@ -654,8 +670,204 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge)
+{
+ struct nh_grp_entry_stats *cpu_stats;
+
+ cpu_stats = get_cpu_ptr(nhge->stats);
+ u64_stats_update_begin(&cpu_stats->syncp);
+ u64_stats_inc(&cpu_stats->packets);
+ u64_stats_update_end(&cpu_stats->syncp);
+ put_cpu_ptr(cpu_stats);
+}
+
+static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge,
+ u64 *ret_packets)
{
+ int i;
+
+ *ret_packets = 0;
+
+ for_each_possible_cpu(i) {
+ struct nh_grp_entry_stats *cpu_stats;
+ unsigned int start;
+ u64 packets;
+
+ cpu_stats = per_cpu_ptr(nhge->stats, i);
+ do {
+ start = u64_stats_fetch_begin(&cpu_stats->syncp);
+ packets = u64_stats_read(&cpu_stats->packets);
+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+ *ret_packets += packets;
+ }
+}
+
+static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info,
+ const struct nexthop *nh)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ info->id = nh->id;
+ info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS;
+ info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats,
+ stats, nhg->num_nh),
+ GFP_KERNEL);
+ if (!info->nh_grp_hw_stats)
+ return -ENOMEM;
+
+ info->nh_grp_hw_stats->num_nh = nhg->num_nh;
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ info->nh_grp_hw_stats->stats[i].id = nhge->nh->id;
+ }
+
+ return 0;
+}
+
+static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info)
+{
+ kfree(info->nh_grp_hw_stats);
+}
+
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets)
+{
+ info->hw_stats_used = true;
+ info->stats[nh_idx].packets += delta_packets;
+}
+EXPORT_SYMBOL(nh_grp_hw_stats_report_delta);
+
+static void nh_grp_hw_stats_apply_update(struct nexthop *nh,
+ struct nh_notifier_info *info)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets;
+ }
+}
+
+static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
+{
+ struct nh_notifier_info info = {
+ .net = nh->net,
+ };
+ struct net *net = nh->net;
+ int err;
+
+ if (nexthop_notifiers_is_empty(net)) {
+ *hw_stats_used = false;
+ return 0;
+ }
+
+ err = nh_notifier_grp_hw_stats_init(&info, nh);
+ if (err)
+ return err;
+
+ err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
+ &info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ nh_grp_hw_stats_apply_update(nh, &info);
+ *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used;
+
+ nh_notifier_grp_hw_stats_fini(&info);
+ return notifier_to_errno(err);
+}
+
+static int nla_put_nh_group_stats_entry(struct sk_buff *skb,
+ struct nh_grp_entry *nhge,
+ u32 op_flags)
+{
+ struct nlattr *nest;
+ u64 packets;
+
+ nh_grp_entry_stats_read(nhge, &packets);
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) ||
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS,
+ packets + nhge->packets_hw))
+ goto nla_put_failure;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW,
+ nhge->packets_hw))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nlattr *nest;
+ bool hw_stats_used;
+ int err;
+ int i;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats))
+ goto err_out;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nhg->hw_stats) {
+ err = nh_grp_hw_stats_update(nh, &hw_stats_used);
+ if (err)
+ goto out;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used))
+ goto err_out;
+ }
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS);
+ if (!nest)
+ goto err_out;
+
+ for (i = 0; i < nhg->num_nh; i++)
+ if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i],
+ op_flags))
+ goto cancel_out;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+cancel_out:
+ nla_nest_cancel(skb, nest);
+err_out:
+ err = -EMSGSIZE;
+out:
+ return err;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
@@ -676,14 +888,20 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
- p->id = nhg->nh_entries[i].nh->id;
- p->weight = nhg->nh_entries[i].weight - 1;
- p += 1;
+ *p++ = (struct nexthop_grp) {
+ .id = nhg->nh_entries[i].nh->id,
+ .weight = nhg->nh_entries[i].weight - 1,
+ };
}
if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
goto nla_put_failure;
+ if (op_flags & NHA_OP_FLAG_DUMP_STATS &&
+ (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) ||
+ nla_put_nh_group_stats(skb, nh, op_flags)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -691,7 +909,8 @@ nla_put_failure:
}
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
- int event, u32 portid, u32 seq, unsigned int nlflags)
+ int event, u32 portid, u32 seq, unsigned int nlflags,
+ u32 op_flags)
{
struct fib6_nh *fib6_nh;
struct fib_nh *fib_nh;
@@ -718,7 +937,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nhg))
+ if (nla_put_nh_group(skb, nh, op_flags))
goto nla_put_failure;
goto out;
}
@@ -849,7 +1068,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
if (!skb)
goto errout;
- err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+ err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -1104,6 +1323,7 @@ static int nh_check_attr_group(struct net *net,
if (!tb[i])
continue;
switch (i) {
+ case NHA_HW_STATS_ENABLE:
case NHA_FDB:
continue;
case NHA_RES_GROUP:
@@ -1176,6 +1396,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1185,7 +1406,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
+ struct nh_grp_entry *nhge0 = NULL;
int i;
if (nhg->fdb_nh)
@@ -1200,16 +1421,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
if (!nexthop_is_good_nh(nhge->nh))
continue;
- if (!rc)
- rc = nhge->nh;
+ if (!nhge0)
+ nhge0 = nhge;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
- return rc ? : nhg->nh_entries[0].nh;
+ if (!nhge0)
+ nhge0 = &nhg->nh_entries[0];
+ nh_grp_entry_stats_inc(nhge0);
+ return nhge0->nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -1225,6 +1450,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
bucket = &res_table->nh_buckets[bucket_index];
nh_res_bucket_set_busy(bucket);
nhge = rcu_dereference(bucket->nh_entry);
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1798,6 +2024,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = true;
list_del(&nhges[i].nh_list);
+ new_nhges[j].stats = nhges[i].stats;
new_nhges[j].nh_parent = nhges[i].nh_parent;
new_nhges[j].nh = nhges[i].nh;
new_nhges[j].weight = nhges[i].weight;
@@ -1813,6 +2040,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
rcu_assign_pointer(nhp->nh_grp, newg);
list_del(&nhge->nh_list);
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
/* Removal of a NH from a resilient group is notified through
@@ -2477,6 +2705,13 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (nhi->family == AF_INET)
nhg->has_v4 = true;
+ nhg->nh_entries[i].stats =
+ netdev_alloc_pcpu_stats(struct nh_grp_entry_stats);
+ if (!nhg->nh_entries[i].stats) {
+ err = -ENOMEM;
+ nexthop_put(nhe);
+ goto out_no_nh;
+ }
nhg->nh_entries[i].nh = nhe;
nhg->nh_entries[i].weight = entry[i].weight + 1;
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
@@ -2509,6 +2744,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (cfg->nh_fdb)
nhg->fdb_nh = 1;
+ if (cfg->nh_hw_stats)
+ nhg->hw_stats = true;
+
rcu_assign_pointer(nh->nh_grp, nhg);
return nh;
@@ -2516,6 +2754,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
out_no_nh:
for (i--; i >= 0; --i) {
list_del(&nhg->nh_entries[i].nh_list);
+ free_percpu(nhg->nh_entries[i].stats);
nexthop_put(nhg->nh_entries[i].nh);
}
@@ -2850,6 +3089,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
cfg, extack);
+ if (tb[NHA_HW_STATS_ENABLE])
+ cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]);
+
/* no other attributes should be set */
goto out;
}
@@ -2941,6 +3183,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
+ if (tb[NHA_HW_STATS_ENABLE]) {
+ NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops");
+ goto out;
+ }
err = 0;
out:
@@ -2966,9 +3212,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
-static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
- struct nlattr **tb, u32 *id,
- struct netlink_ext_ack *extack)
+static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u32 *id, u32 *op_flags,
+ struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
@@ -2988,28 +3234,21 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- return 0;
-}
-
-static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
- int err;
-
- err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
- ARRAY_SIZE(rtm_nh_policy_get) - 1,
- rtm_nh_policy_get, extack);
- if (err < 0)
- return err;
+ if (op_flags) {
+ if (tb[NHA_OP_FLAGS])
+ *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
+ else
+ *op_flags = 0;
+ }
- return __nh_valid_get_del_req(nlh, tb, id, extack);
+ return 0;
}
/* rtnl */
static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)];
struct net *net = sock_net(skb->sk);
struct nl_info nlinfo = {
.nlh = nlh,
@@ -3020,7 +3259,13 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack);
if (err)
return err;
@@ -3037,13 +3282,21 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
struct net *net = sock_net(in_skb->sk);
struct sk_buff *skb = NULL;
struct nexthop *nh;
+ u32 op_flags;
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
if (err)
return err;
@@ -3058,7 +3311,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_free;
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ nlh->nlmsg_seq, 0, op_flags);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
goto errout_free;
@@ -3079,6 +3332,7 @@ struct nh_dump_filter {
bool group_filter;
bool fdb_filter;
u32 res_bucket_nh_id;
+ u32 op_flags;
};
static bool nh_dump_filtered(struct nexthop *nh,
@@ -3166,6 +3420,11 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[NHA_OP_FLAGS])
+ filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
+ else
+ filter->op_flags = 0;
+
return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}
@@ -3223,7 +3482,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags);
}
/* rtnl */
@@ -3241,10 +3500,6 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
&rtm_dump_nexthop_cb, &filter);
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3439,11 +3694,6 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
&rtm_dump_nexthop_bucket_cb, &dd);
}
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
-
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3483,7 +3733,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
- err = __nh_valid_get_del_req(nlh, tb, id, extack);
+ err = nh_valid_get_del_req(nlh, tb, id, NULL, extack);
if (err)
return err;
@@ -3631,17 +3881,24 @@ unlock:
}
EXPORT_SYMBOL(register_nexthop_notifier);
-int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
nb);
- if (err)
- goto unlock;
- nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
-unlock:
+ if (!err)
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(__unregister_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = __unregister_nexthop_notifier(net, nb);
rtnl_unlock();
return err;
}
@@ -3737,16 +3994,20 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
+static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
+ ASSERT_RTNL();
+ list_for_each_entry(net, net_list, exit_list)
flush_all_nexthops(net);
- kfree(net->nexthop.devhash);
- }
- rtnl_unlock();
+}
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ kfree(net->nexthop.devhash);
+ net->nexthop.devhash = NULL;
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3764,7 +4025,8 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit_batch = nexthop_net_exit_batch,
+ .exit = nexthop_net_exit,
+ .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5f4654ebff..6c4664c681 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -33,6 +33,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <linux/bottom_half.h>
@@ -395,7 +396,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
+ IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2,
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 288f1846b3..4cb43401e0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -175,6 +175,13 @@ static int raw_v4_input(struct net *net, struct sk_buff *skb,
if (!raw_v4_match(net, sk, iph->protocol,
iph->saddr, iph->daddr, dif, sdif))
continue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >=
+ READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_inc(&sk->sk_drops);
+ continue;
+ }
+
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
@@ -310,7 +317,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
raw_rcv_skb(sk, skb);
return 0;
@@ -605,6 +612,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
+ fl4.fl4_icmp_type = 0;
+ fl4.fl4_icmp_code = 0;
+
if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -816,7 +826,7 @@ static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *o
out: return ret;
}
-static int do_raw_setsockopt(struct sock *sk, int level, int optname,
+static int do_raw_setsockopt(struct sock *sk, int optname,
sockptr_t optval, unsigned int optlen)
{
if (optname == ICMP_FILTER) {
@@ -833,11 +843,11 @@ static int raw_setsockopt(struct sock *sk, int level, int optname,
{
if (level != SOL_RAW)
return ip_setsockopt(sk, level, optname, optval, optlen);
- return do_raw_setsockopt(sk, level, optname, optval, optlen);
+ return do_raw_setsockopt(sk, optname, optval, optlen);
}
-static int do_raw_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+static int do_raw_getsockopt(struct sock *sk, int optname,
+ char __user *optval, int __user *optlen)
{
if (optname == ICMP_FILTER) {
if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
@@ -853,7 +863,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
{
if (level != SOL_RAW)
return ip_getsockopt(sk, level, optname, optval, optlen);
- return do_raw_getsockopt(sk, level, optname, optval, optlen);
+ return do_raw_getsockopt(sk, optname, optval, optlen);
}
static int raw_ioctl(struct sock *sk, int cmd, int *karg)
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index fe2140c837..cc793bd8de 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -213,6 +213,7 @@ static int raw_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler raw_diag_handler = {
+ .owner = THIS_MODULE,
.dump = raw_diag_dump,
.dump_one = raw_diag_dump_one,
.idiag_get_info = raw_diag_get_info,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f67d3d6fe9..990912fa18 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -106,9 +106,6 @@
#include "fib_lookup.h"
-#define RT_FL_TOS(oldflp4) \
- ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-
#define RT_GC_TIMEOUT (300*HZ)
#define DEFAULT_MIN_PMTU (512 + 20 + 20)
@@ -132,7 +129,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ipv4_mtu(const struct dst_entry *dst);
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
@@ -498,15 +496,6 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
-static void ip_rt_fix_tos(struct flowi4 *fl4)
-{
- __u8 tos = RT_FL_TOS(fl4);
-
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- if (tos & RTO_ONLINK)
- fl4->flowi4_scope = RT_SCOPE_LINK;
-}
-
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk, const struct iphdr *iph,
int oif, __u8 tos, u8 prot, u32 mark,
@@ -831,28 +820,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
u32 mark = skb->mark;
__u8 tos = iph->tos;
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+static void ipv4_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
- struct dst_entry *ret = dst;
+ struct rtable *rt = dst_rtable(dst);
- if (rt) {
- if (dst->obsolete > 0) {
- ip_rt_put(rt);
- ret = NULL;
- } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
- rt->dst.expires) {
- ip_rt_put(rt);
- ret = NULL;
- }
- }
- return ret;
+ if ((dst->obsolete > 0) ||
+ (rt->rt_flags & RTCF_REDIRECTED) ||
+ rt->dst.expires)
+ sk_dst_reset(sk);
}
/*
@@ -1056,7 +1038,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
@@ -1127,7 +1109,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = (struct rtable *)odst;
+ rt = dst_rtable(odst);
if (odst->obsolete && !odst->ops->check(odst, 0)) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
@@ -1136,7 +1118,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
+ __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -1193,7 +1175,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
u32 cookie)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
/* All IPV4 dsts are created with ->obsolete set to the value
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
@@ -1281,7 +1263,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_tos = iph->tos & IPTOS_RT_MASK,
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1528,10 +1510,8 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
-
ip_dst_metrics_put(dst);
- rt_del_uncached_list(rt);
+ rt_del_uncached_list(dst_rtable(dst));
}
void rt_flush_dev(struct net_device *dev)
@@ -2314,7 +2294,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (IN_DEV_BFORWARD(in_dev))
goto make_route;
/* not do cache if bc_forwarding is enabled */
- if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING))
do_cache = false;
goto brd_input;
}
@@ -2639,7 +2619,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- ip_rt_fix_tos(fl4);
+ fl4->flowi4_tos &= IPTOS_RT_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -2832,7 +2812,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rtable *ort = (struct rtable *) dst_orig;
+ struct rtable *ort = dst_rtable(dst_orig);
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
@@ -2877,9 +2857,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
- rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = dst_rtable(xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0));
}
return rt;
@@ -2888,9 +2868,9 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
- struct rtable *rt, u32 table_id, struct flowi4 *fl4,
- struct sk_buff *skb, u32 portid, u32 seq,
- unsigned int flags)
+ struct rtable *rt, u32 table_id, dscp_t dscp,
+ struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
+ u32 seq, unsigned int flags)
{
struct rtmsg *r;
struct nlmsghdr *nlh;
@@ -2906,7 +2886,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
- r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
+ r->rtm_tos = inet_dscp_to_dsfield(dscp);
r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure;
@@ -2994,7 +2974,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
#ifdef CONFIG_IP_MROUTE
if (ipv4_is_multicast(dst) &&
!ipv4_is_local_multicast(dst) &&
- IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+ IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
r, portid);
@@ -3056,7 +3036,7 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
goto next;
err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
- table_id, NULL, skb,
+ table_id, 0, NULL, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags);
if (err)
@@ -3352,7 +3332,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
+ fri.dscp = res.dscp;
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3379,8 +3359,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
} else {
- err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
- NETLINK_CB(in_skb).portid,
+ err = rt_fill_info(net, dst, src, rt, table_id, res.dscp, &fl4,
+ skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
}
if (err < 0)
@@ -3510,7 +3490,6 @@ static struct ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static const char ipv4_route_flush_procname[] = "flush";
@@ -3544,7 +3523,6 @@ static struct ctl_table ipv4_route_netns_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
static __net_init int sysctl_route_net_init(struct net *net)
@@ -3562,16 +3540,14 @@ static __net_init int sysctl_route_net_init(struct net *net)
/* Don't export non-whitelisted sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
- if (tbl[0].procname != ipv4_route_flush_procname) {
- tbl[0].procname = NULL;
+ if (tbl[0].procname != ipv4_route_flush_procname)
table_size = 0;
- }
}
/* Update the variables to point into the current struct net
* except for the first element flush
*/
- for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
+ for (i = 1; i < table_size; i++)
tbl[i].data += (void *)net - (void *)&init_net;
}
tbl[0].extra1 = net;
@@ -3591,7 +3567,7 @@ err_dup:
static __net_exit void sysctl_route_net_exit(struct net *net)
{
- struct ctl_table *tbl;
+ const struct ctl_table *tbl;
tbl = net->ipv4.route_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.route_hdr);
@@ -3694,9 +3670,8 @@ int __init ip_rt_init(void)
panic("IP: failed to allocate ip_rt_acct\n");
#endif
- ipv4_dst_ops.kmem_cachep =
- kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC);
ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 61f1c96cfe..b61d36810f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -51,15 +51,6 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
count, &syncookie_secret[c]);
}
-/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
-static u64 tcp_ns_to_ts(bool usec_ts, u64 val)
-{
- if (usec_ts)
- return div_u64(val, NSEC_PER_USEC);
-
- return div_u64(val, NSEC_PER_MSEC);
-}
-
/*
* when syncookies are in effect and tcp timestamps are enabled we encode
* tcp options in the lower bits of the timestamp value that will be
@@ -304,6 +295,24 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
return 0;
}
+#if IS_ENABLED(CONFIG_BPF)
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct request_sock *req = inet_reqsk(skb->sk);
+
+ skb->sk = NULL;
+ skb->destructor = NULL;
+
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ req = NULL;
+ }
+
+ return req;
+}
+EXPORT_SYMBOL_GPL(cookie_bpf_check);
+#endif
+
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk, struct sk_buff *skb,
struct tcp_options_received *tcp_opt,
@@ -399,16 +408,23 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct rtable *rt;
__u8 rcv_wscale;
int full_space;
+ SKB_DR(reason);
if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
- if (!req)
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
+ if (!req) {
+ SKB_DR_SET(reason, NO_SOCKET);
goto out_drop;
+ }
ireq = inet_rsk(req);
@@ -420,8 +436,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
*/
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
- if (security_inet_conn_request(sk, skb, req))
+ if (security_inet_conn_request(sk, skb, req)) {
+ SKB_DR_SET(reason, SECURITY_HOOK);
goto out_free;
+ }
tcp_ao_syncookie(sk, skb, req, AF_INET);
@@ -438,11 +456,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
+ if (IS_ERR(rt)) {
+ SKB_DR_SET(reason, IP_OUTNOROUTES);
goto out_free;
+ }
/* Try to redo what tcp_v4_send_synack did. */
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
+ dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
@@ -454,19 +475,27 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ /* req->syncookie is set true only if ACK is validated
+ * by BPF kfunc, then, rcv_wscale is already configured.
+ */
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
- if (ret)
- inet_sk(ret)->cork.fl.u.ip4 = fl4;
+ if (!ret) {
+ SKB_DR_SET(reason, NO_SOCKET);
+ goto out_drop;
+ }
+ inet_sk(ret)->cork.fl.u.ip4 = fl4;
out:
return ret;
out_free:
reqsk_free(req);
out_drop:
+ kfree_skb_reason(skb, reason);
return NULL;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e4f16a7dc..162a0a3b6b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -575,7 +575,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
- { }
};
static struct ctl_table ipv4_net_table[] = {
@@ -1502,11 +1501,11 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ONE,
},
- { }
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv4_net_table);
struct ctl_table *table;
table = ipv4_net_table;
@@ -1517,7 +1516,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ for (i = 0; i < table_size; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
@@ -1533,7 +1532,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table,
- ARRAY_SIZE(ipv4_net_table));
+ table_size);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
@@ -1554,7 +1553,7 @@ err_alloc:
static __net_exit void ipv4_sysctl_exit_net(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
kfree(net->ipv4.sysctl_local_reserved_ports);
table = net->ipv4.ipv4_hdr->ctl_table_arg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5887eac87b..ec69110341 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,13 +272,17 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/hotdata.h>
+#include <net/rps.h>
/* Track pending CMSGs. */
enum {
@@ -289,6 +293,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
+DEFINE_PER_CPU(u32, tcp_tw_isn);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
+
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);
@@ -591,7 +598,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
mask |= EPOLLOUT | EPOLLWRNORM;
}
- /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */
smp_rmb();
if (READ_ONCE(sk->sk_err) ||
!skb_queue_empty_lockless(&sk->sk_error_queue))
@@ -974,7 +981,7 @@ int tcp_wmem_schedule(struct sock *sk, int copy)
* Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
* to guarantee some progress.
*/
- left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
+ left = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]) - sk->sk_wmem_queued;
if (left > 0)
sk_forced_mem_schedule(sk, min(left, copy));
return min(copy, sk->sk_forward_alloc);
@@ -1158,6 +1165,9 @@ new_segment:
process_backlog++;
+#ifdef CONFIG_SKB_DECRYPTED
+ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
tcp_skb_entail(sk, skb);
copy = size_goal;
@@ -1183,7 +1193,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1415,8 +1425,6 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
struct sk_buff *skb;
int copied = 0, err = 0;
- /* XXX -- need to support SO_PEEK_OFF */
-
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
@@ -1720,7 +1728,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
space = tcp_space_from_win(sk, val);
if (space > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, space);
- tcp_sk(sk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
}
return 0;
}
@@ -2327,6 +2335,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int target; /* Read at least this many bytes */
long timeo;
struct sk_buff *skb, *last;
+ u32 peek_offset = 0;
u32 urg_hole = 0;
err = -ENOTCONN;
@@ -2360,7 +2369,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
seq = &tp->copied_seq;
if (flags & MSG_PEEK) {
- peek_seq = tp->copied_seq;
+ peek_offset = max(sk_peek_offset(sk, flags), 0);
+ peek_seq = tp->copied_seq + peek_offset;
seq = &peek_seq;
}
@@ -2463,11 +2473,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
}
if ((flags & MSG_PEEK) &&
- (peek_seq - copied - urg_hole != tp->copied_seq)) {
+ (peek_seq - peek_offset - copied - urg_hole != tp->copied_seq)) {
net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
current->comm,
task_pid_nr(current));
- peek_seq = tp->copied_seq;
+ peek_seq = tp->copied_seq + peek_offset;
}
continue;
@@ -2508,7 +2518,10 @@ found_ok_skb:
WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
-
+ if (flags & MSG_PEEK)
+ sk_peek_offset_fwd(sk, used);
+ else
+ sk_peek_offset_bwd(sk, used);
tcp_rcv_space_adjust(sk);
skip_copy:
@@ -2636,6 +2649,10 @@ void tcp_set_state(struct sock *sk, int state)
if (oldstate != TCP_ESTABLISHED)
TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
break;
+ case TCP_CLOSE_WAIT:
+ if (oldstate == TCP_SYN_RECV)
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+ break;
case TCP_CLOSE:
if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
@@ -2647,7 +2664,7 @@ void tcp_set_state(struct sock *sk, int state)
inet_put_port(sk);
fallthrough;
default:
- if (oldstate == TCP_ESTABLISHED)
+ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
}
@@ -2709,7 +2726,7 @@ void tcp_shutdown(struct sock *sk, int how)
/* If we've already sent a FIN, or it's a closed state, skip this. */
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
- TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
+ TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
@@ -2743,7 +2760,15 @@ static bool tcp_too_many_orphans(int shift)
READ_ONCE(sysctl_tcp_max_orphans);
}
-bool tcp_check_oom(struct sock *sk, int shift)
+static bool tcp_out_of_memory(const struct sock *sk)
+{
+ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+ sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
+ return true;
+ return false;
+}
+
+bool tcp_check_oom(const struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
@@ -2804,7 +2829,8 @@ void __tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, sk->sk_allocation);
+ tcp_send_active_reset(sk, sk->sk_allocation,
+ SK_RST_REASON_NOT_SPECIFIED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2818,7 +2844,7 @@ void __tcp_close(struct sock *sk, long timeout)
* machine. State transitions:
*
* TCP_ESTABLISHED -> TCP_FIN_WAIT1
- * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
+ * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult)
* TCP_CLOSE_WAIT -> TCP_LAST_ACK
*
* are legal only when FIN has been sent (i.e. in window),
@@ -2878,7 +2904,8 @@ adjudge_to_death:
struct tcp_sock *tp = tcp_sk(sk);
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2896,7 +2923,8 @@ adjudge_to_death:
if (sk->sk_state != TCP_CLOSE) {
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3000,7 +3028,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any());
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -3009,6 +3037,7 @@ int tcp_disconnect(struct sock *sk, int flags)
__skb_queue_purge(&sk->sk_receive_queue);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
WRITE_ONCE(tp->urg_data, 0);
+ sk_set_peek_off(sk, -1);
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -3378,7 +3407,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (!val) {
if (sk->sk_state != TCP_CLOSE)
return -EINVAL;
- tp->window_clamp = 0;
+ WRITE_ONCE(tp->window_clamp, 0);
} else {
u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
@@ -3387,7 +3416,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (new_window_clamp == old_window_clamp)
return 0;
- tp->window_clamp = new_window_clamp;
+ WRITE_ONCE(tp->window_clamp, new_window_clamp);
if (new_window_clamp < old_window_clamp) {
/* need to apply the reserved mem provisioning only
* when shrinking the window clamp
@@ -4056,7 +4085,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
- val = tp->window_clamp;
+ val = READ_ONCE(tp->window_clamp);
break;
case TCP_INFO: {
struct tcp_info info;
@@ -4341,6 +4370,9 @@ zerocopy_rcv_out:
return err;
}
+ case TCP_IS_MPTCP:
+ val = 0;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -4551,13 +4583,10 @@ int tcp_abort(struct sock *sk, int err)
bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
- WRITE_ONCE(sk->sk_err, err);
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
- sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
+ tcp_done_with_error(sk, err);
}
bh_unlock_sock(sk);
@@ -4647,16 +4676,16 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 89);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
@@ -4669,7 +4698,11 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* 32bit arches with 8byte alignment on u64 fields might need padding
+ * before tcp_clock_cache.
+ */
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 254d6e3f93..9171e10668 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -266,32 +266,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head)
kfree_sensitive(key);
}
-void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+static void tcp_ao_info_free_rcu(struct rcu_head *head)
{
- struct tcp_ao_info *ao;
+ struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu);
struct tcp_ao_key *key;
struct hlist_node *n;
+ hlist_for_each_entry_safe(key, n, &ao->head, node) {
+ hlist_del(&key->node);
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ kfree_sensitive(key);
+ }
+ kfree(ao);
+ static_branch_slow_dec_deferred(&tcp_ao_needed);
+}
+
+static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao)
+{
+ size_t total_ao_sk_mem = 0;
+ struct tcp_ao_key *key;
+
+ hlist_for_each_entry(key, &ao->head, node)
+ total_ao_sk_mem += tcp_ao_sizeof_key(key);
+ atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc);
+}
+
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+ struct tcp_ao_info *ao;
+
if (twsk) {
ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1);
- tcp_twsk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL);
} else {
ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1);
- tcp_sk(sk)->ao_info = NULL;
+ rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL);
}
if (!ao || !refcount_dec_and_test(&ao->refcnt))
return;
- hlist_for_each_entry_safe(key, n, &ao->head, node) {
- hlist_del_rcu(&key->node);
- if (!twsk)
- atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
- call_rcu(&key->rcu, tcp_ao_key_free_rcu);
- }
-
- kfree_rcu(ao, rcu);
- static_branch_slow_dec_deferred(&tcp_ao_needed);
+ if (!twsk)
+ tcp_ao_sk_omem_free(sk, ao);
+ call_rcu(&ao->rcu, tcp_ao_info_free_rcu);
}
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp)
@@ -509,9 +526,9 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp,
bool exclude_options, u8 *hash,
int hash_offset, int hash_len)
{
- int err, len = th->doff << 2;
struct scatterlist sg;
u8 *hdr = hp->scratch;
+ int err, len;
/* We are not allowed to change tcphdr, make a local copy */
if (exclude_options) {
@@ -933,6 +950,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
struct tcp_ao_key *key;
__be32 sisn, disn;
u8 *traffic_key;
+ int state;
u32 sne = 0;
info = rcu_dereference(tcp_sk(sk)->ao_info);
@@ -948,8 +966,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
disn = 0;
}
+ state = READ_ONCE(sk->sk_state);
/* Fast-path */
- if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
+ if (likely((1 << state) & TCP_AO_ESTABLISHED)) {
enum skb_drop_reason err;
struct tcp_ao_key *current_key;
@@ -988,6 +1007,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
return SKB_NOT_DROPPED_YET;
}
+ if (unlikely(state == TCP_CLOSE))
+ return SKB_DROP_REASON_TCP_CLOSE;
+
/* Lookup key based on peer address and keyid.
* current_key and rnext_key must not be used on tcp listen
* sockets as otherwise:
@@ -1001,7 +1023,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
if (th->syn && !th->ack)
goto verify_hash;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
+ if ((1 << state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
/* Make the initial syn the likely case here */
if (unlikely(req)) {
sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn,
@@ -1018,14 +1040,14 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
/* no way to figure out initial sisn/disn - drop */
return SKB_DROP_REASON_TCP_FLAGS;
}
- } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ } else if ((1 << state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
disn = info->lisn;
if (th->syn || th->rst)
sisn = th->seq;
else
sisn = info->risn;
} else {
- WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state);
+ WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", state);
return SKB_DROP_REASON_TCP_AOFAILURE;
}
verify_hash:
@@ -1963,8 +1985,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
first = true;
}
- if (cmd.ao_required && tcp_ao_required_verify(sk))
- return -EKEYREJECTED;
+ if (cmd.ao_required && tcp_ao_required_verify(sk)) {
+ err = -EKEYREJECTED;
+ goto out;
+ }
/* For sockets in TCP_CLOSED it's possible set keys that aren't
* matching the future peer (address/port/VRF/etc),
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 22358032dd..760941e551 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1024,7 +1024,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_update_gains(sk);
}
-__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
@@ -1155,9 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
-BTF_SET8_START(tcp_bbr_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
BTF_ID_FLAGS(func, bbr_init)
BTF_ID_FLAGS(func, bbr_main)
BTF_ID_FLAGS(func, bbr_sndbuf_expand)
@@ -1166,9 +1164,7 @@ BTF_ID_FLAGS(func, bbr_cwnd_event)
BTF_ID_FLAGS(func, bbr_ssthresh)
BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
-#endif
-#endif
-BTF_SET8_END(tcp_bbr_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1b34050a75..28ffcfbeef 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -146,11 +146,7 @@ EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
{
struct tcp_congestion_ops *existing;
- int ret;
-
- ret = tcp_validate_congestion_control(ca);
- if (ret)
- return ret;
+ int ret = 0;
ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0fd78ecb67..5dbed91c61 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,18 +485,14 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
-BTF_SET8_START(tcp_cubic_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids)
BTF_ID_FLAGS(func, cubictcp_init)
BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
BTF_ID_FLAGS(func, cubictcp_cong_avoid)
BTF_ID_FLAGS(func, cubictcp_state)
BTF_ID_FLAGS(func, cubictcp_cwnd_event)
BTF_ID_FLAGS(func, cubictcp_acked)
-#endif
-#endif
-BTF_SET8_END(tcp_cubic_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index bb23bb5b38..8a45a4aea9 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -58,7 +58,18 @@ struct dctcp {
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
-module_param(dctcp_shift_g, uint, 0644);
+
+static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(val, kp, 0, 10);
+}
+
+static const struct kernel_param_ops dctcp_shift_g_ops = {
+ .set = dctcp_shift_g_set,
+ .get = param_get_uint,
+};
+
+module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644);
MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha");
static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
@@ -260,18 +271,14 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
-BTF_SET8_START(tcp_dctcp_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
BTF_ID_FLAGS(func, dctcp_init)
BTF_ID_FLAGS(func, dctcp_update_alpha)
BTF_ID_FLAGS(func, dctcp_cwnd_event)
BTF_ID_FLAGS(func, dctcp_ssthresh)
BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
-#endif
-#endif
-BTF_SET8_END(tcp_dctcp_check_kfunc_ids)
+BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 4cbe4b4442..f428ecf912 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -222,6 +222,7 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler tcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = tcp_diag_dump,
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df7b13f0e5..ecd5211085 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
@@ -563,19 +564,20 @@ static void tcp_init_buffer_space(struct sock *sk)
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
- tp->window_clamp = maxwin;
+ WRITE_ONCE(tp->window_clamp, maxwin);
if (tcp_app_win && maxwin > 4 * tp->advmss)
- tp->window_clamp = max(maxwin -
- (maxwin >> tcp_app_win),
- 4 * tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(maxwin - (maxwin >> tcp_app_win),
+ 4 * tp->advmss));
}
/* Force reservation of one segment. */
if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
- tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(2 * tp->advmss, maxwin - tp->advmss));
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -752,8 +754,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) {
u64 rcvwin, grow;
int rcvbuf;
@@ -769,11 +770,22 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- if (rcvbuf > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+ if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
- /* Make the window clamp follow along. */
- tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
+ /* Make the window clamp follow along. */
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
+ }
+ } else {
+ /* Make the window clamp follow along while being bounded
+ * by SO_RCVBUF.
+ */
+ int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf));
+
+ if (clamp > tp->window_clamp)
+ WRITE_ONCE(tp->window_clamp, clamp);
}
}
tp->rcvq_space.space = copied;
@@ -911,7 +923,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk);
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
} else {
/* no previous measure. */
@@ -921,7 +933,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
tp->srtt_us = max(1U, srtt);
}
@@ -1164,7 +1176,7 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
* L|R 1 - orig is lost, retransmit is in flight.
* S|R 1 - orig reached receiver, retrans is still in flight.
* (L|S|R is logically valid, it could occur when L|R is sacked,
- * but it is equivalent to plain S and code short-curcuits it to S.
+ * but it is equivalent to plain S and code short-circuits it to S.
* L|S is logically invalid, it would mean -1 packet in flight 8))
*
* These 6 states form finite state machine, controlled by the following events:
@@ -2126,8 +2138,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
static inline void tcp_init_undo(struct tcp_sock *tp)
{
tp->undo_marker = tp->snd_una;
+
/* Retransmission still in flight may cause DSACKs later. */
- tp->undo_retrans = tp->retrans_out ? : -1;
+ /* First, account for regular retransmits in flight: */
+ tp->undo_retrans = tp->retrans_out;
+ /* Next, account for TLP retransmits in flight: */
+ if (tp->tlp_high_seq && tp->tlp_retrans)
+ tp->undo_retrans++;
+ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
+ if (!tp->undo_retrans)
+ tp->undo_retrans = -1;
}
static bool tcp_is_rack(const struct sock *sk)
@@ -2206,6 +2226,7 @@ void tcp_enter_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
@@ -2779,13 +2800,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
+/* Sometimes we deduce that packets have been dropped due to reasons other than
+ * congestion, like path MTU reductions or failed client TFO attempts. In these
+ * cases we call this function to retransmit as many packets as cwnd allows,
+ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
+ * non-zero value (and may do so in a later calling context due to TSQ), we
+ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
+ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
+ * are using the correct retrans_stamp and don't declare ETIMEDOUT
+ * prematurely).
+ */
+static void tcp_non_congestion_loss_retransmit(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (icsk->icsk_ca_state != TCP_CA_Loss) {
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_ssthresh = 0;
+ tp->undo_marker = 0;
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Do a simple retransmit without using the backoff mechanisms in
* tcp_timer. This is used for path mtu discovery.
* The socket is already locked here.
*/
void tcp_simple_retransmit(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int mss;
@@ -2825,14 +2870,7 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
- if (icsk->icsk_ca_state != TCP_CA_Loss) {
- tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_ssthresh = 0;
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Loss);
- }
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
}
EXPORT_SYMBOL(tcp_simple_retransmit);
@@ -3057,7 +3095,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
return;
if (tcp_try_undo_dsack(sk))
- tcp_try_keep_open(sk);
+ tcp_try_to_open(sk, flag);
tcp_identify_packet_loss(sk, ack_flag);
if (icsk->icsk_ca_state != TCP_CA_Recovery) {
@@ -3539,7 +3577,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cong_control) {
- icsk->icsk_ca_ops->cong_control(sk, rs);
+ icsk->icsk_ca_ops->cong_control(sk, ack, flag, rs);
return;
}
@@ -4204,6 +4242,13 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
+#ifdef CONFIG_TCP_AO
+ case TCPOPT_AO:
+ /* TCP AO has already been checked
+ * (see tcp_inbound_ao_hash()).
+ */
+ break;
+#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -4433,9 +4478,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
return SKB_NOT_DROPPED_YET;
}
+
+void tcp_done_with_error(struct sock *sk, int err)
+{
+ /* This barrier is coupled with smp_rmb() in tcp_poll() */
+ WRITE_ONCE(sk->sk_err, err);
+ smp_wmb();
+
+ tcp_write_queue_purge(sk);
+ tcp_done(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
+}
+EXPORT_SYMBOL(tcp_done_with_error);
+
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
+ int err;
+
trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
@@ -4447,24 +4509,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- WRITE_ONCE(sk->sk_err, ECONNREFUSED);
+ err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
- WRITE_ONCE(sk->sk_err, EPIPE);
+ err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
- WRITE_ONCE(sk->sk_err, ECONNRESET);
+ err = ECONNRESET;
}
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
-
- tcp_write_queue_purge(sk);
- tcp_done(sk);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+ tcp_done_with_error(sk, err);
}
/*
@@ -4803,10 +4858,8 @@ static bool tcp_try_coalesce(struct sock *sk,
if (!mptcp_skb_can_collapse(to, from))
return false;
-#ifdef CONFIG_TLS_DEVICE
- if (from->decrypted != to->decrypted)
+ if (skb_cmp_decrypted(from, to))
return false;
-#endif
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
@@ -5174,6 +5227,16 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0) {
+ /* Some stacks are known to send bare FIN packets
+ * in a loop even if we send RWIN 0 in our ACK.
+ * Accepting this FIN does not hurt memory pressure
+ * because the FIN flag will simply be merged to the
+ * receive queue tail skb in most cases.
+ */
+ if (!skb->len &&
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
+ goto queue_and_out;
+
reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
@@ -5188,7 +5251,7 @@ queue_and_out:
inet_csk_schedule_ack(sk);
sk->sk_data_ready(sk);
- if (skb_queue_len(&sk->sk_receive_queue)) {
+ if (skb_queue_len(&sk->sk_receive_queue) && skb->len) {
reason = SKB_DROP_REASON_PROTO_MEM;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
goto drop;
@@ -5375,9 +5438,7 @@ restart:
break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
-#ifdef CONFIG_TLS_DEVICE
- nskb->decrypted = skb->decrypted;
-#endif
+ skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
if (list)
__skb_queue_before(list, skb, nskb);
@@ -5407,10 +5468,8 @@ restart:
!mptcp_skb_can_collapse(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
-#ifdef CONFIG_TLS_DEVICE
- if (skb->decrypted != nskb->decrypted)
+ if (skb_cmp_decrypted(skb, nskb))
goto end;
-#endif
}
}
}
@@ -6288,7 +6347,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data)
tcp_mark_skb_lost(sk, data);
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
@@ -6361,6 +6420,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
inet_csk_reset_xmit_timer(sk,
ICSK_TIME_RETRANS,
TCP_TIMEOUT_MIN, TCP_RTO_MAX);
+ SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE);
goto reset_and_undo;
}
@@ -6369,6 +6429,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_ts(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto reset_and_undo;
}
@@ -6424,7 +6485,8 @@ consume:
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp, 65535U);
+ WRITE_ONCE(tp->window_clamp,
+ min(tp->window_clamp, 65535U));
}
if (tp->rx_opt.saw_tstamp) {
@@ -6572,7 +6634,8 @@ discard_and_undo:
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
- return 1;
+ /* we can reuse/return @reason to its caller to handle the exception */
+ return reason;
}
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
@@ -6616,14 +6679,14 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
* address independent.
*/
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
+enum skb_drop_reason
+tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
- bool acceptable;
SKB_DR(reason);
switch (sk->sk_state) {
@@ -6633,7 +6696,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_LISTEN:
if (th->ack)
- return 1;
+ return SKB_DROP_REASON_TCP_FLAGS;
if (th->rst) {
SKB_DR_SET(reason, TCP_RESET);
@@ -6649,12 +6712,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
*/
rcu_read_lock();
local_bh_disable();
- acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ icsk->icsk_af_ops->conn_request(sk, skb);
local_bh_enable();
rcu_read_unlock();
- if (!acceptable)
- return 1;
consume_skb(skb);
return 0;
}
@@ -6699,17 +6760,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
/* step 5: check the ACK field */
- acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT |
- FLAG_NO_CHALLENGE_ACK) > 0;
-
- if (!acceptable) {
- if (sk->sk_state == TCP_SYN_RECV)
- return 1; /* send one RST */
- tcp_send_challenge_ack(sk);
- SKB_DR_SET(reason, TCP_OLD_ACK);
- goto discard;
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH |
+ FLAG_UPDATE_TS_RECENT |
+ FLAG_NO_CHALLENGE_ACK);
+
+ if ((int)reason <= 0) {
+ if (sk->sk_state == TCP_SYN_RECV) {
+ /* send one RST */
+ if (!reason)
+ return SKB_DROP_REASON_TCP_OLD_ACK;
+ return -reason;
+ }
+ /* accept old ack during closing */
+ if ((int)reason < 0) {
+ tcp_send_challenge_ack(sk);
+ reason = -reason;
+ goto discard;
+ }
}
+ SKB_DR_SET(reason, NOT_SPECIFIED);
switch (sk->sk_state) {
case TCP_SYN_RECV:
tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
@@ -6752,6 +6821,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ tcp_shutdown(sk, SEND_SHUTDOWN);
break;
case TCP_FIN_WAIT1: {
@@ -6777,7 +6848,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (READ_ONCE(tp->linger2) < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
@@ -6786,7 +6857,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
tmo = tcp_fin_time(sk);
@@ -6851,7 +6922,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk, skb);
- return 1;
+ return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
}
}
fallthrough;
@@ -6990,7 +7061,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
* Return true if a syncookie should be sent
*/
-static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+static bool tcp_syn_flood_action(struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
@@ -7091,7 +7162,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
- __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@@ -7101,21 +7171,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
+ u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+ isn = __this_cpu_read(tcp_tw_isn);
+ if (isn) {
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ __this_cpu_write(tcp_tw_isn, 0);
+ } else {
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
- if (!want_cookie)
- goto drop;
+ if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
+ want_cookie = tcp_syn_flood_action(sk,
+ rsk_ops->slab_name);
+ if (!want_cookie)
+ goto drop;
+ }
}
if (sk_acceptq_is_full(sk)) {
@@ -7154,7 +7231,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
- dst = af_ops->route_req(sk, skb, &fl, req);
+ dst = af_ops->route_req(sk, skb, &fl, req, isn);
if (!dst)
goto drop_and_free;
@@ -7231,7 +7308,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie) {
req->timeout = tcp_timeout_init((struct sock *)req);
- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
+ req->timeout))) {
+ reqsk_free(req);
+ return 0;
+ }
+
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c50c5a32b..a541659b65 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -154,6 +155,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
+ /* inet_twsk_hashdance() sets sk_refcnt after putting twsk
+ * and releasing the bucket lock.
+ */
+ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+ return 0;
+
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
@@ -174,7 +181,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
- sock_hold(sktw);
+
return 1;
}
@@ -604,15 +611,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
-
- sk_error_report(sk);
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
}
@@ -723,7 +725,8 @@ out:
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -866,11 +869,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk) {
+ if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- if (sk_fullsock(sk))
- trace_tcp_send_reset(sk, skb);
- }
+
+ trace_tcp_send_reset(sk, skb, reason);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1137,14 +1139,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
#endif
}
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent),
0, &key,
@@ -1667,7 +1664,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
tcp_v4_init_req(req, sk, skb);
@@ -1907,7 +1905,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (tcp_checksum_complete(skb))
goto csum_err;
@@ -1915,9 +1912,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
struct sock *nsk = tcp_v4_cookie_check(sk, skb);
if (!nsk)
- goto discard;
+ return 0;
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb)) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason) {
rsk = nsk;
goto reset;
}
@@ -1926,14 +1924,15 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb)) {
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason) {
rsk = sk;
goto reset;
}
return 0;
reset:
- tcp_v4_send_reset(rsk, skb);
+ tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
discard:
kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
@@ -1994,7 +1993,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason *reason)
{
- u32 limit, tail_gso_size, tail_gso_segs;
+ u32 tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -2003,6 +2002,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
bool fragstolen;
u32 gso_segs;
u32 gso_size;
+ u64 limit;
int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -2044,10 +2044,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
- tail->decrypted != skb->decrypted ||
-#endif
!mptcp_skb_can_collapse(tail, skb) ||
+ skb_cmp_decrypted(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
@@ -2100,7 +2098,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
__skb_push(skb, hdrlen);
no_coalesce:
- limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+ /* sk->sk_backlog.len is reset only at the end of __release_sock().
+ * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
+ * sk_rcvbuf in normal conditions.
+ */
+ limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
+
+ limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
@@ -2108,6 +2112,8 @@ no_coalesce:
*/
limit += 64 * 1024;
+ limit = min_t(u64, limit, UINT_MAX);
+
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
@@ -2147,7 +2153,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2169,6 +2174,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
+ u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -2206,7 +2212,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -2275,15 +2280,21 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v4_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v4_send_reset(nsk, skb, rst_reason);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
}
+process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
@@ -2354,7 +2365,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(NULL, skb);
+ tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -2382,7 +2393,7 @@ do_time_wait:
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@@ -2396,6 +2407,7 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -2405,7 +2417,7 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v4_send_reset(sk, skb);
+ tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
@@ -2415,7 +2427,6 @@ do_time_wait:
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
@@ -3498,7 +3509,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- tcp_twsk_purge(net_exit_list, AF_INET);
+ tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c2a9255385..b01eb6d944 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] =
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
[TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
.len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
@@ -766,6 +767,7 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
unsigned int max_rows = 1U << tcp_metrics_hash_log;
unsigned int row, s_row = cb->args[0];
int s_col = cb->args[1], col = s_col;
+ int res = 0;
for (row = s_row; row < max_rows; row++, s_col = 0) {
struct tcp_metrics_block *tm;
@@ -778,7 +780,8 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
continue;
if (col < s_col)
continue;
- if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
+ res = tcp_metrics_dump_info(skb, cb, tm);
+ if (res < 0) {
rcu_read_unlock();
goto done;
}
@@ -789,7 +792,7 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
done:
cb->args[0] = row;
cb->args[1] = col;
- return skb->len;
+ return res;
}
static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
@@ -986,6 +989,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.maxattr = TCP_METRICS_ATTR_MAX,
.policy = tcp_metrics_nl_policy,
.netnsok = true,
+ .parallel_ops = true,
.module = THIS_MODULE,
.small_ops = tcp_metrics_nl_ops,
.n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ecc7311dc..0fbebf6266 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
@@ -95,7 +96,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
- const struct tcphdr *th)
+ const struct tcphdr *th, u32 *tw_isn)
{
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -228,7 +229,7 @@ kill:
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
- TCP_SKB_CB(skb)->tcp_tw_isn = isn;
+ *tw_isn = isn;
return TCP_TW_SYN;
}
@@ -388,7 +389,7 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
-void tcp_twsk_purge(struct list_head *net_exit_list, int family)
+void tcp_twsk_purge(struct list_head *net_exit_list)
{
bool purged_once = false;
struct net *net;
@@ -396,14 +397,13 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family)
list_for_each_entry(net, net_exit_list, exit_list) {
if (net->ipv4.tcp_death_row.hashinfo->pernet) {
/* Even if tw_refcount == 1, we must clean up kernel reqsk */
- inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
+ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo);
} else if (!purged_once) {
- inet_twsk_purge(&tcp_hashinfo, family);
+ inet_twsk_purge(&tcp_hashinfo);
purged_once = true;
}
}
}
-EXPORT_SYMBOL_GPL(tcp_twsk_purge);
/* Warning : This function is called without sk_listener being locked.
* Be sure to read socket fields once, as their value could change under us.
@@ -515,9 +515,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
const struct tcp_sock *oldtp;
struct tcp_sock *newtp;
u32 seq;
-#ifdef CONFIG_TCP_AO
- struct tcp_ao_key *ao_key;
-#endif
if (!newsk)
return NULL;
@@ -608,10 +605,14 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
#endif
#ifdef CONFIG_TCP_AO
newtp->ao_info = NULL;
- ao_key = treq->af_specific->ao_lookup(sk, req,
- tcp_rsk(req)->ao_keyid, -1);
- if (ao_key)
- newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
+
+ if (tcp_rsk_used_ao(req)) {
+ struct tcp_ao_key *ao_key;
+
+ ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1);
+ if (ao_key)
+ newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
+ }
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
@@ -783,8 +784,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
+ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ tcp_rsk(req)->rcv_nxt,
+ tcp_rsk(req)->rcv_nxt +
+ tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
@@ -879,7 +883,7 @@ embryonic_reset:
* avoid becoming vulnerable to outside attack aiming at
* resetting legit local connections.
*/
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN);
} else if (fastopen) { /* received a valid RST pkt */
reqsk_fastopen_remove(sk, req, true);
tcp_reset(sk, skb);
@@ -907,11 +911,11 @@ EXPORT_SYMBOL(tcp_check_req);
* be created.
*/
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb)
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb)
__releases(&((child)->sk_lock.slock))
{
- int ret = 0;
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
int state = child->sk_state;
/* record sk_napi_id and sk_rx_queue_mapping of child. */
@@ -919,7 +923,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
- ret = tcp_rcv_state_process(child, skb);
+ reason = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
parent->sk_data_ready(parent);
@@ -933,6 +937,6 @@ int tcp_child_process(struct sock *parent, struct sock *child,
bh_unlock_sock(child);
sock_put(child);
- return ret;
+ return reason;
}
EXPORT_SYMBOL(tcp_child_process);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8311c38267..e4ad3311e1 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
}
}
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == newip && *oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+
+ csum_replace4(&iph->check, *oldip, newip);
+ *oldip = newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct iphdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct iphdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, iph->saddr,
+ &th2->source, th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, iph->daddr,
+ &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -73,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
@@ -178,63 +248,76 @@ out:
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
{
- struct sk_buff *pp = NULL;
+ struct tcphdr *th2;
struct sk_buff *p;
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ return p;
+ }
+
+ return NULL;
+}
+
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
th = skb_gro_header(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
- goto out;
+ return NULL;
hlen = off + thlen;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
}
skb_gro_pull(skb, thlen);
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- list_for_each_entry(p, head, list) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
+ return th;
+}
- th2 = tcp_hdr(p);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ unsigned int thlen = th->doff * 4;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th2;
+ unsigned int len;
+ __be32 flags;
+ unsigned int mss = 1;
+ int flush = 1;
+ int i;
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
- goto found;
- }
- p = NULL;
- goto out_check_final;
+ p = tcp_gro_lookup(head, th);
+ if (!p)
+ goto out_check_final;
-found:
- /* Include the IP ID check below from the inner most IP hdr */
- flush = NAPI_GRO_CB(p)->flush;
- flush |= (__force int)(flags & TCP_FLAG_CWR);
+ th2 = tcp_hdr(p);
+ flush = (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
@@ -242,16 +325,7 @@ found:
flush |= *(u32 *)((u8 *)th + i) ^
*(u32 *)((u8 *)th2 + i);
- /* When we receive our second frame we can made a decision on if we
- * continue this flow as an atomic flow with a fixed ID or if we use
- * an incrementing ID.
- */
- if (NAPI_GRO_CB(p)->flush_id != 1 ||
- NAPI_GRO_CB(p)->count != 1 ||
- !NAPI_GRO_CB(p)->is_atomic)
- flush |= NAPI_GRO_CB(p)->flush_id;
- else
- NAPI_GRO_CB(p)->is_atomic = false;
+ flush |= gro_receive_network_flush(th, th2, p);
mss = skb_shinfo(p)->gso_size;
@@ -265,9 +339,19 @@ found:
flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
-#ifdef CONFIG_TLS_DEVICE
- flush |= p->decrypted ^ skb->decrypted;
-#endif
+ flush |= skb_cmp_decrypted(p, skb);
+
+ if (unlikely(NAPI_GRO_CB(p)->is_flist)) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
@@ -290,7 +374,6 @@ out_check_final:
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
pp = p;
-out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
return pp;
@@ -299,60 +382,110 @@ out:
void tcp_gro_complete(struct sk_buff *skb)
{
struct tcphdr *th = tcp_hdr(skb);
+ struct skb_shared_info *shinfo;
+
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
skb->csum_start = (unsigned char *)th - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
- skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ shinfo = skb_shinfo(skb);
+ shinfo->gso_segs = NAPI_GRO_CB(skb)->count;
if (th->cwr)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
-
- if (skb->encapsulation)
- skb->inner_transport_header = skb->transport_header;
+ shinfo->gso_type |= SKB_GSO_TCP_ECN;
}
EXPORT_SYMBOL(tcp_gro_complete);
+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ const struct iphdr *iph;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
+ iph = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source,
+ iph->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- inet_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ inet_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp4_check_fraglist_gro(head, skb, th);
- return tcp_gro_receive(head, skb);
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
- if (NAPI_GRO_CB(skb)->is_atomic)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 |
+ (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID);
tcp_gro_complete(skb);
return 0;
}
-static const struct net_offload tcpv4_offload = {
- .callbacks = {
- .gso_segment = tcp4_gso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
- },
-};
-
int __init tcpv4_offload_init(void)
{
- return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
+ net_hotdata.tcpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp4_gso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.tcpv4_offload, IPPROTO_TCP);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3167ad965..95618d0e78 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,11 +39,13 @@
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
+#include <linux/skbuff_ref.h>
#include <trace/events/tcp.h>
@@ -203,16 +205,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- __u32 *rcv_wnd, __u32 *window_clamp,
+ __u32 *rcv_wnd, __u32 *__window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
+ u32 window_clamp = READ_ONCE(*__window_clamp);
/* If no clamp set the clamp to the max possible scaled window */
- if (*window_clamp == 0)
- (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
- space = min(*window_clamp, space);
+ if (window_clamp == 0)
+ window_clamp = (U16_MAX << TCP_MAX_WSCALE);
+ space = min(window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
@@ -229,7 +232,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
- (*rcv_wnd) = min_t(u32, space, U16_MAX);
+ (*rcv_wnd) = space;
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
@@ -239,12 +242,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
- space = min_t(u32, space, *window_clamp);
+ space = min_t(u32, space, window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
+ WRITE_ONCE(*__window_clamp,
+ min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -1499,18 +1503,22 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
}
/* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
+static int tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
+ int tso_segs;
+
if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- tcp_skb_pcount_set(skb, 1);
TCP_SKB_CB(skb)->tcp_gso_size = 0;
- } else {
- tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
- TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
+ tcp_skb_pcount_set(skb, 1);
+ return 1;
}
+ TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
+ tso_segs = DIV_ROUND_UP(skb->len, mss_now);
+ tcp_skb_pcount_set(skb, tso_segs);
+ return tso_segs;
}
/* Pcount in the middle of the write queue got changed, we need to do various
@@ -2070,16 +2078,10 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
/* Can at least one segment of SKB be sent right now, according to the
* congestion window rules? If so, return how many segments are allowed.
*/
-static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
- const struct sk_buff *skb)
+static u32 tcp_cwnd_test(const struct tcp_sock *tp)
{
u32 in_flight, cwnd, halfcwnd;
- /* Don't be strict about the congestion window for the final FIN. */
- if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
- tcp_skb_pcount(skb) == 1)
- return 1;
-
in_flight = tcp_packets_in_flight(tp);
cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
@@ -2100,10 +2102,9 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
- if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
- tcp_set_skb_tso_segs(skb, mss_now);
- tso_segs = tcp_skb_pcount(skb);
- }
+ if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now))
+ return tcp_set_skb_tso_segs(skb, mss_now);
+
return tso_segs;
}
@@ -2403,6 +2404,21 @@ commit:
return 0;
}
+/* tcp_mtu_probe() and tcp_grow_skb() can both eat an skb (src) if
+ * all its payload was moved to another one (dst).
+ * Make sure to transfer tcp_flags, eor, and tstamp.
+ */
+static void tcp_eat_one_skb(struct sock *sk,
+ struct sk_buff *dst,
+ struct sk_buff *src)
+{
+ TCP_SKB_CB(dst)->tcp_flags |= TCP_SKB_CB(src)->tcp_flags;
+ TCP_SKB_CB(dst)->eor = TCP_SKB_CB(src)->eor;
+ tcp_skb_collapse_tstamp(dst, src);
+ tcp_unlink_write_queue(src, sk);
+ tcp_wmem_free_skb(sk, src);
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -2508,16 +2524,7 @@ static int tcp_mtu_probe(struct sock *sk)
copy = min_t(int, skb->len, probe_size - len);
if (skb->len <= copy) {
- /* We've eaten all the data from this skb.
- * Throw it away. */
- TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
- /* If this is the last SKB we copy and eor is set
- * we need to propagate it to the new skb.
- */
- TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
- tcp_skb_collapse_tstamp(nskb, skb);
- tcp_unlink_write_queue(skb, sk);
- tcp_wmem_free_skb(sk, skb);
+ tcp_eat_one_skb(sk, nskb, skb);
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
@@ -2683,6 +2690,35 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
}
+/* First skb in the write queue is smaller than ideal packet size.
+ * Check if we can move payload from the second skb in the queue.
+ */
+static void tcp_grow_skb(struct sock *sk, struct sk_buff *skb, int amount)
+{
+ struct sk_buff *next_skb = skb->next;
+ unsigned int nlen;
+
+ if (tcp_skb_is_last(sk, skb))
+ return;
+
+ if (!tcp_skb_can_collapse(skb, next_skb))
+ return;
+
+ nlen = min_t(u32, amount, next_skb->len);
+ if (!nlen || !skb_shift(skb, next_skb, nlen))
+ return;
+
+ TCP_SKB_CB(skb)->end_seq += nlen;
+ TCP_SKB_CB(next_skb)->seq += nlen;
+
+ if (!next_skb->len) {
+ /* In case FIN is set, we need to update end_seq */
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+
+ tcp_eat_one_skb(sk, skb, next_skb);
+ }
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -2703,10 +2739,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int tso_segs, sent_pkts;
- int cwnd_quota;
+ u32 cwnd_quota, max_segs;
int result;
bool is_cwnd_limited = false, is_rwnd_limited = false;
- u32 max_segs;
sent_pkts = 0;
@@ -2724,6 +2759,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
max_segs = tcp_tso_segs(sk, mss_now);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
+ int missing_bytes;
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
@@ -2737,10 +2773,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (tcp_pacing_check(sk))
break;
- tso_segs = tcp_init_tso_segs(skb, mss_now);
- BUG_ON(!tso_segs);
-
- cwnd_quota = tcp_cwnd_test(tp, skb);
+ cwnd_quota = tcp_cwnd_test(tp);
if (!cwnd_quota) {
if (push_one == 2)
/* Force out a loss probe pkt. */
@@ -2748,6 +2781,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
else
break;
}
+ cwnd_quota = min(cwnd_quota, max_segs);
+ missing_bytes = cwnd_quota * mss_now - skb->len;
+ if (missing_bytes > 0)
+ tcp_grow_skb(sk, skb, missing_bytes);
+
+ tso_segs = tcp_set_skb_tso_segs(skb, mss_now);
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
is_rwnd_limited = true;
@@ -2769,9 +2808,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- min_t(unsigned int,
- cwnd_quota,
- max_segs),
+ cwnd_quota,
nonagle);
if (skb->len > limit &&
@@ -3387,11 +3424,6 @@ start:
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
- /* To avoid taking spuriously low RTT samples based on a timestamp
- * for a transmit that never happened, always mark EVER_RETRANS
- */
- TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
-
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
TCP_SKB_CB(skb)->seq, segs, err);
@@ -3401,6 +3433,12 @@ start:
} else if (err != -EBUSY) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
+
+ /* To avoid taking spuriously low RTT samples based on a timestamp
+ * for a transmit that never happened, always mark EVER_RETRANS
+ */
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+
return err;
}
@@ -3563,7 +3601,9 @@ void tcp_send_fin(struct sock *sk)
return;
}
} else {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC |
+ __GFP_NOWARN));
if (unlikely(!skb))
return;
@@ -3583,7 +3623,8 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by RFC 2525, section 2.17. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, gfp_t priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason)
{
struct sk_buff *skb;
@@ -3608,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL);
+ trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
}
/* Send a crossed SYN-ACK during socket establishment.
@@ -3855,7 +3896,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_ca_dst_init(sk, dst);
if (!tp->window_clamp)
- tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+ WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
tcp_initialize_rcv_mss(sk);
@@ -3863,7 +3904,7 @@ static void tcp_connect_init(struct sock *sk)
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
- tp->window_clamp = tcp_full_space(sk);
+ WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
rcv_wnd = tcp_rwnd_init_bpf(sk);
if (rcv_wnd == 0)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d1ad20ce1c..4d40615dc8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,10 +22,11 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <net/tcp.h>
+#include <net/rstreason.h>
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed, user_timeout;
s32 remaining;
@@ -47,7 +48,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
u32 remaining, user_timeout;
s32 elapsed;
@@ -73,11 +74,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk)
{
- WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
- sk_error_report(sk);
-
- tcp_write_queue_purge(sk);
- tcp_done(sk);
+ tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
@@ -127,7 +124,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
if (do_reset)
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -481,11 +479,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct sk_buff *skb,
u32 rtx_delta)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- const int timeout = TCP_RTO_MAX * 2;
- u32 rcv_delta;
+ int timeout = TCP_RTO_MAX * 2;
+ s32 rcv_delta;
- rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ if (user_timeout) {
+ /* If user application specified a TCP_USER_TIMEOUT,
+ * it does not want win 0 packets to 'reset the timer'
+ * while retransmits are not making progress.
+ */
+ if (rtx_delta > user_timeout)
+ return true;
+ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+ }
+ /* Note: timer interrupt might have been delayed by at least one jiffy,
+ * and tp->rcv_tstamp might very well have been written recently.
+ * rcv_delta can thus be negative.
+ */
+ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -530,8 +543,6 @@ void tcp_retransmit_timer(struct sock *sk)
if (WARN_ON_ONCE(!skb))
return;
- tp->tlp_high_seq = 0;
-
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
@@ -768,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
goto death;
}
@@ -795,7 +806,8 @@ static void tcp_keepalive_timer (struct timer_list *t)
icsk->icsk_probes_out > 0) ||
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 40282a3418..578668878a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -326,6 +326,8 @@ found:
goto fail_unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -342,7 +344,7 @@ found:
hslot2->count++;
spin_unlock(&hslot2->lock);
}
- sock_set_flag(sk, SOCK_RCU_FREE);
+
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -411,8 +413,6 @@ INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
- static u32 udp_ehash_secret __read_mostly;
-
net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
@@ -429,15 +429,21 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
+ bool need_rescore;
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ need_rescore = false;
+rescore:
+ score = compute_score(need_rescore ? result : sk, net, saddr,
+ sport, daddr, hnum, dif, sdif);
if (score > badness) {
badness = score;
+ if (need_rescore)
+ continue;
+
if (sk->sk_state == TCP_ESTABLISHED) {
result = sk;
continue;
@@ -458,9 +464,14 @@ static struct sock *udp4_lib_lookup2(struct net *net,
if (IS_ERR(result))
continue;
- badness = compute_score(result, net, saddr, sport,
- daddr, hnum, dif, sdif);
-
+ /* compute_score is too long of a function to be
+ * inlined, and calling it again here yields
+ * measureable overhead for some
+ * workloads. Work around it by jumping
+ * backwards to rescore 'result'.
+ */
+ need_rescore = true;
+ goto rescore;
}
}
return result;
@@ -534,7 +545,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
@@ -1208,7 +1220,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (connected)
- rt = (struct rtable *)sk_dst_check(sk, 0);
+ rt = dst_rtable(sk_dst_check(sk, 0));
if (!rt) {
struct net *net = sock_net(sk);
@@ -1502,13 +1514,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
- int size;
+ bool becomes_readable;
+ int size, rcvbuf;
- /* try to avoid the costly atomic add/sub pair when the receive
- * queue is full; always allow at least a packet
+ /* Immediately drop when the receive queue is full.
+ * Always allow at least one packet.
*/
rmem = atomic_read(&sk->sk_rmem_alloc);
- if (rmem > sk->sk_rcvbuf)
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ if (rmem > rcvbuf)
goto drop;
/* Under mem pressure, it might be helpful to help udp_recvmsg()
@@ -1517,7 +1531,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* - Less cache line misses at copyout() time
* - Less work at consume_skb() (less alien page frag freeing)
*/
- if (rmem > (sk->sk_rcvbuf >> 1)) {
+ if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
busy = busylock_acquire(sk);
@@ -1525,12 +1539,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
size = skb->truesize;
udp_set_dev_scratch(skb);
- /* we drop only if the receive buf is full and the receive
- * queue contains some other skb
- */
- rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
- if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
- goto uncharge_drop;
+ atomic_add(size, &sk->sk_rmem_alloc);
spin_lock(&list->lock);
err = udp_rmem_schedule(sk, size);
@@ -1546,12 +1555,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
+ becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
- if (!sock_flag(sk, SOCK_DEAD))
- INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
-
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ if (becomes_readable ||
+ sk->sk_data_ready != sock_def_readable ||
+ READ_ONCE(sk->sk_peek_off) >= 0)
+ INDIRECT_CALL_1(sk->sk_data_ready,
+ sock_def_readable, sk);
+ else
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
busylock_release(busy);
return 0;
@@ -1597,7 +1613,8 @@ int udp_init_sock(struct sock *sk)
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
- sk_peek_offset_bwd(sk, len);
+ if (unlikely(READ_ONCE(udp_sk(sk)->peeking_with_offset)))
+ sk_peek_offset_bwd(sk, len);
if (!skb_unref(skb))
return;
@@ -2058,8 +2075,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -2577,11 +2594,12 @@ int udp_v4_early_demux(struct sk_buff *skb)
uh->source, iph->saddr, dif, sdif);
}
- if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+ if (!sk)
return 0;
skb->sk = sk;
- skb->destructor = sock_efree;
+ DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+ skb->destructor = sock_pfree;
dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
@@ -2696,8 +2714,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
- fallthrough;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
WRITE_ONCE(up->encap_rcv,
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index dc41a22ee8..38cb3a28e4 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -237,6 +237,7 @@ static int udplite_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler udp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udp_diag_dump,
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
@@ -260,6 +261,7 @@ static int udplite_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler udplite_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udplite_diag_dump,
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index c3d67423ae..ee9af92155 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb)))
+ return ERR_PTR(-EINVAL);
+
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
@@ -433,33 +437,6 @@ out:
return segs;
}
-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
-{
- if (unlikely(p->len + skb->len >= 65536))
- return -E2BIG;
-
- if (NAPI_GRO_CB(p)->last == p)
- skb_shinfo(p)->frag_list = skb;
- else
- NAPI_GRO_CB(p)->last->next = skb;
-
- skb_pull(skb, skb_gro_offset(skb));
-
- NAPI_GRO_CB(p)->last = skb;
- NAPI_GRO_CB(p)->count++;
- p->data_len += skb->len;
-
- /* sk ownership - if any - completely transferred to the aggregated packet */
- skb->destructor = NULL;
- skb->sk = NULL;
- p->truesize += skb->truesize;
- p->len += skb->len;
-
- NAPI_GRO_CB(skb)->same_flow = 1;
-
- return 0;
-}
-
#define UDP_GRO_CNT_MAX 64
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
@@ -471,6 +448,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *p;
unsigned int ulen;
int ret = 0;
+ int flush;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -504,13 +482,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return p;
}
+ flush = gro_receive_network_flush(uh, uh2, p);
+
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
* leading to excessive truesize values.
* On len mismatch merge the first packet shorter than gso_size,
* otherwise complete the GRO packet.
*/
- if (ulen > ntohs(uh2->len)) {
+ if (ulen > ntohs(uh2->len) || flush) {
pp = p;
} else {
if (NAPI_GRO_CB(skb)->is_flist) {
@@ -718,7 +698,8 @@ EXPORT_SYMBOL(udp_gro_complete);
INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
@@ -740,15 +721,14 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
-static const struct net_offload udpv4_offload = {
- .callbacks = {
- .gso_segment = udp4_ufo_fragment,
- .gro_receive = udp4_gro_receive,
- .gro_complete = udp4_gro_complete,
- },
-};
-
int __init udpv4_offload_init(void)
{
- return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
+ net_hotdata.udpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp4_gro_receive,
+ .gro_complete = udp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP);
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 860aff5f85..e4e0fa869f 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -183,7 +183,8 @@ void udp_tunnel_sock_release(struct socket *sock)
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
- __be16 flags, __be64 tunnel_id, int md_size)
+ const unsigned long *flags,
+ __be64 tunnel_id, int md_size)
{
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
@@ -199,7 +200,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
info->key.tp_src = udp_hdr(skb)->source;
info->key.tp_dst = udp_hdr(skb)->dest;
if (udp_hdr(skb)->check)
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
return tun_dst;
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index c54676998e..a620618cc5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -58,12 +58,16 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return -iph->protocol;
#endif
- __skb_push(skb, skb->data - skb_network_header(skb));
+ __skb_push(skb, -skb_network_offset(skb));
iph->tot_len = htons(skb->len);
ip_send_check(iph);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -113,19 +117,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c33bca2c38..0294fef577 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif,
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rtable *rt = (struct rtable *)xdst->route;
+ struct rtable *rt = dst_rtable(xdst->route);
const struct flowi4 *fl4 = &fl->u.ip4;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
@@ -152,7 +152,6 @@ static struct ctl_table xfrm4_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __net_init int xfrm4_net_sysctl_init(struct net *net)
@@ -186,7 +185,7 @@ err_alloc:
static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!net->ipv4.xfrm4_hdr)
return;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 37d48aa073..4f2c5cc310 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -63,6 +63,7 @@
#include <linux/string.h>
#include <linux/hash.h>
+#include <net/ip_tunnels.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -195,6 +196,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -257,6 +259,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -549,7 +552,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto out;
if ((all || type == NETCONFA_FORWARDING) &&
- nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ nla_put_s32(skb, NETCONFA_FORWARDING,
+ READ_ONCE(devconf->forwarding)) < 0)
goto nla_put_failure;
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
@@ -558,12 +562,13 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
- nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+ nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
+ READ_ONCE(devconf->proxy_ndp)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- devconf->ignore_routes_with_linkdown) < 0)
+ READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0)
goto nla_put_failure;
out:
@@ -713,7 +718,7 @@ errout:
static u32 inet6_base_seq(const struct net *net)
{
u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
- net->dev_base_seq;
+ READ_ONCE(net->dev_base_seq);
/* Must not return 0 (see nl_dump_check_consistent()).
* Chose a value far away from 0.
@@ -723,17 +728,18 @@ static u32 inet6_base_seq(const struct net *net)
return res;
}
-
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -750,64 +756,46 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = inet6_base_seq(net);
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (inet6_netconf_fill_devconf(skb, dev->ifindex,
- &idev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_netconf_fill_devconf(skb, dev->ifindex,
+ &idev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv6.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -867,7 +855,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
- idev->cnf.forwarding = newf;
+
+ WRITE_ONCE(idev->cnf.forwarding, newf);
if (changed)
dev_forward_change(idev);
}
@@ -884,7 +873,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
@@ -899,7 +888,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_all->forwarding) {
int old_dflt = net->ipv6.devconf_dflt->forwarding;
- net->ipv6.devconf_dflt->forwarding = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf);
if ((!newf) ^ (!old_dflt))
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
@@ -931,7 +920,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
if (idev) {
int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
- idev->cnf.ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf);
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
RTM_NEWNETCONF,
@@ -952,7 +941,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
@@ -966,7 +955,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
}
if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
- net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf);
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
@@ -1270,6 +1259,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
bool del_rt, bool del_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1279,8 +1269,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else {
- if (!(f6i->fib6_flags & RTF_EXPIRES))
+ if (!(f6i->fib6_flags & RTF_EXPIRES)) {
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
fib6_info_release(f6i);
}
}
@@ -1346,12 +1343,21 @@ out:
in6_ifa_put(ifp);
}
+static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev)
+{
+ return READ_ONCE(idev->cnf.regen_min_advance) +
+ READ_ONCE(idev->cnf.regen_max_retry) *
+ READ_ONCE(idev->cnf.dad_transmits) *
+ max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+}
+
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
{
struct inet6_dev *idev = ifp->idev;
unsigned long tmp_tstamp, age;
unsigned long regen_advance;
unsigned long now = jiffies;
+ u32 if_public_preferred_lft;
s32 cnf_temp_preferred_lft;
struct inet6_ifaddr *ift;
struct ifa6_config cfg;
@@ -1363,7 +1369,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
retry:
in6_dev_hold(idev);
- if (idev->cnf.use_tempaddr <= 0) {
+ if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) {
write_unlock_bh(&idev->lock);
pr_info("%s: use_tempaddr is disabled\n", __func__);
in6_dev_put(idev);
@@ -1371,8 +1377,8 @@ retry:
goto out;
}
spin_lock_bh(&ifp->lock);
- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
- idev->cnf.use_tempaddr = -1; /*XXX*/
+ if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) {
+ WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/
spin_unlock_bh(&ifp->lock);
write_unlock_bh(&idev->lock);
pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
@@ -1387,16 +1393,14 @@ retry:
age = (now - ifp->tstamp) / HZ;
- regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ regen_advance = ipv6_get_regen_advance(idev);
/* recalculate max_desync_factor each time and update
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
max_desync_factor = min_t(long,
- idev->cnf.max_desync_factor,
+ READ_ONCE(idev->cnf.max_desync_factor),
cnf_temp_preferred_lft - regen_advance);
if (unlikely(idev->desync_factor > max_desync_factor)) {
@@ -1409,11 +1413,13 @@ retry:
}
}
+ if_public_preferred_lft = ifp->prefered_lft;
+
memset(&cfg, 0, sizeof(cfg));
cfg.valid_lft = min_t(__u32, ifp->valid_lft,
- idev->cnf.temp_valid_lft + age);
+ READ_ONCE(idev->cnf.temp_valid_lft) + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
- cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft);
cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
@@ -1422,19 +1428,41 @@ retry:
write_unlock_bh(&idev->lock);
- /* A temporary address is created only if this calculated Preferred
- * Lifetime is greater than REGEN_ADVANCE time units. In particular,
- * an implementation must not create a temporary address with a zero
- * Preferred Lifetime.
+ /* From RFC 4941:
+ *
+ * A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In
+ * particular, an implementation must not create a temporary address
+ * with a zero Preferred Lifetime.
+ *
+ * ...
+ *
+ * When creating a temporary address, the lifetime values MUST be
+ * derived from the corresponding prefix as follows:
+ *
+ * ...
+ *
+ * * Its Preferred Lifetime is the lower of the Preferred Lifetime
+ * of the public address or TEMP_PREFERRED_LIFETIME -
+ * DESYNC_FACTOR.
+ *
+ * To comply with the RFC's requirements, clamp the preferred lifetime
+ * to a minimum of regen_advance, unless that would exceed valid_lft or
+ * ifp->prefered_lft.
+ *
* Use age calculation as in addrconf_verify to avoid unnecessary
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
if (cfg.preferred_lft <= regen_advance + age) {
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
+ cfg.preferred_lft = regen_advance + age + 1;
+ if (cfg.preferred_lft > cfg.valid_lft ||
+ cfg.preferred_lft > if_public_preferred_lft) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
}
cfg.ifa_flags = IFA_F_TEMPORARY;
@@ -1513,15 +1541,17 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-static bool ipv6_use_optimistic_addr(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
- if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+ if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) &&
+ !READ_ONCE(idev->cnf.use_optimistic))
return false;
return true;
@@ -1530,13 +1560,14 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
-static bool ipv6_allow_optimistic_dad(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_allow_optimistic_dad(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
return true;
@@ -1642,7 +1673,7 @@ static int ipv6_get_saddr_eval(struct net *net,
*/
int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
- score->ifa->idev->cnf.use_tempaddr >= 2;
+ READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2;
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
@@ -1818,7 +1849,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
idev = __in6_dev_get(dst_dev);
if ((dst_type & IPV6_ADDR_MULTICAST) ||
dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
- (idev && idev->cnf.use_oif_addrs_only)) {
+ (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) {
use_oif_addr = true;
}
}
@@ -1842,7 +1873,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
master, &dst,
scores, hiscore_idx);
- if (scores[hiscore_idx].ifa)
+ if (scores[hiscore_idx].ifa &&
+ scores[hiscore_idx].scopedist >= 0)
goto out;
}
@@ -2126,6 +2158,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(idev->dev);
+ int max_addresses;
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2163,9 +2196,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
spin_unlock_bh(&ifp->lock);
- if (idev->cnf.max_addresses &&
- ipv6_count_addresses(idev) >=
- idev->cnf.max_addresses)
+ max_addresses = READ_ONCE(idev->cnf.max_addresses);
+ if (max_addresses &&
+ ipv6_count_addresses(idev) >= max_addresses)
goto lock_errdad;
net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
@@ -2562,11 +2595,11 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
*/
age = (now - ift->cstamp) / HZ;
- max_valid = idev->cnf.temp_valid_lft - age;
+ max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age;
if (max_valid < 0)
max_valid = 0;
- max_prefered = idev->cnf.temp_prefered_lft -
+ max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) -
idev->desync_factor - age;
if (max_prefered < 0)
max_prefered = 0;
@@ -2599,7 +2632,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
create = true;
- if (create && idev->cnf.use_tempaddr > 0) {
+ if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
*/
@@ -2627,7 +2660,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
+ int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses);
struct ifa6_config cfg = {
.pfx = addr,
.plen = pinfo->prefix_len,
@@ -2639,8 +2672,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
};
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((net->ipv6.devconf_all->optimistic_dad ||
- in6_dev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(in6_dev->cnf.optimistic_dad)) &&
!net->ipv6.devconf_all->forwarding && sllao)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -2689,7 +2722,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
*/
update_lft = !create && stored_lft;
- if (update_lft && !in6_dev->cnf.ra_honor_pio_life) {
+ if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
@@ -2698,7 +2731,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
+ WRITE_ONCE(ifp->tstamp, now);
flags = ifp->flags;
ifp->flags &= ~IFA_F_DEPRECATED;
spin_unlock_bh(&ifp->lock);
@@ -2722,6 +2755,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
+ struct fib6_table *table;
__u32 valid_lft;
__u32 prefered_lft;
int addr_type, err;
@@ -2798,11 +2832,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (valid_lft == 0) {
ip6_del_rt(net, rt, false);
rt = NULL;
- } else if (addrconf_finite_timeout(rt_expires)) {
- /* not infinity */
- fib6_set_expires(rt, jiffies + rt_expires);
} else {
- fib6_clean_expires(rt);
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ fib6_set_expires(rt, jiffies + rt_expires);
+ fib6_add_gc_list(rt);
+ } else {
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -2877,7 +2920,7 @@ put:
static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
struct in6_ifreq *ireq)
{
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
@@ -3263,8 +3306,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
struct inet6_ifaddr *ifp;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
- idev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(idev->cnf.optimistic_dad)) &&
!dev_net(idev->dev)->ipv6.devconf_all->forwarding)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -3443,7 +3486,8 @@ static void addrconf_dev_config(struct net_device *dev)
/* this device type has no EUI support */
if (dev->type == ARPHRD_NONE &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_RANDOM);
addrconf_addr_gen(idev, false);
}
@@ -3621,7 +3665,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
break;
}
@@ -3713,9 +3757,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev->cnf.mtu6 != dev->mtu &&
dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*
@@ -3835,10 +3879,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
*/
if (!unregister && !idev->cnf.disable_ipv6) {
/* aggregate the system setting and interface setting */
- int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+ int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down);
if (!_keep_addr)
- _keep_addr = idev->cnf.keep_addr_on_down;
+ _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down);
keep_addr = (_keep_addr > 0);
}
@@ -3957,7 +4001,7 @@ restart:
ipv6_mc_down(idev);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
@@ -3975,6 +4019,7 @@ static void addrconf_rs_timer(struct timer_list *t)
struct inet6_dev *idev = from_timer(idev, t, rs_timer);
struct net_device *dev = idev->dev;
struct in6_addr lladdr;
+ int rtr_solicits;
write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
@@ -3987,7 +4032,9 @@ static void addrconf_rs_timer(struct timer_list *t)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
+ rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits);
+
+ if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) {
write_unlock(&idev->lock);
if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
ndisc_send_rs(dev, &lladdr,
@@ -3997,11 +4044,12 @@ static void addrconf_rs_timer(struct timer_list *t)
write_lock(&idev->lock);
idev->rs_interval = rfc3315_s14_backoff_update(
- idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
+ idev->rs_interval,
+ READ_ONCE(idev->cnf.rtr_solicit_max_interval));
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
- idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
+ READ_ONCE(idev->cnf.rtr_solicits)) ?
+ READ_ONCE(idev->cnf.rtr_solicit_delay) :
idev->rs_interval);
} else {
/*
@@ -4022,24 +4070,25 @@ put:
*/
static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
{
- unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
+ unsigned long rand_num;
u64 nonce;
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = get_random_u32_below(
+ READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1);
nonce = 0;
- if (idev->cnf.enhanced_dad ||
- dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
+ if (READ_ONCE(idev->cnf.enhanced_dad) ||
+ READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) {
do
get_random_bytes(&nonce, 6);
while (nonce == 0);
}
ifp->dad_nonce = nonce;
- ifp->dad_probes = idev->cnf.dad_transmits;
+ ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits);
addrconf_mod_dad_work(ifp, rand_num);
}
@@ -4059,8 +4108,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
net = dev_net(dev);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- (net->ipv6.devconf_all->accept_dad < 1 &&
- idev->cnf.accept_dad < 1) ||
+ (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 &&
+ READ_ONCE(idev->cnf.accept_dad) < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
bool send_na = false;
@@ -4152,8 +4201,8 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
- idev->cnf.accept_dad > 1) &&
+ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 ||
+ READ_ONCE(idev->cnf.accept_dad) > 1) &&
!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
struct in6_addr addr;
@@ -4164,7 +4213,7 @@ static void addrconf_dad_work(struct work_struct *w)
if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
ipv6_addr_equal(&ifp->addr, &addr)) {
/* DAD failed for link-local based on MAC */
- idev->cnf.disable_ipv6 = 1;
+ WRITE_ONCE(idev->cnf.disable_ipv6, 1);
pr_info("%s: IPv6 being disabled!\n",
ifp->idev->dev->name);
@@ -4278,7 +4327,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits != 0 &&
+ READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 &&
(dev->flags & IFF_LOOPBACK) == 0 &&
(dev->type != ARPHRD_TUNNEL) &&
!netif_is_team_port(dev);
@@ -4292,8 +4341,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
/* send unsolicited NA if enabled */
if (send_na &&
- (ifp->idev->cnf.ndisc_notify ||
- dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+ (READ_ONCE(ifp->idev->cnf.ndisc_notify) ||
+ READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
/*router=*/ !!ifp->idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
@@ -4313,7 +4362,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
write_lock_bh(&ifp->idev->lock);
spin_lock(&ifp->lock);
ifp->idev->rs_interval = rfc3315_s14_backoff_init(
- ifp->idev->cnf.rtr_solicit_interval);
+ READ_ONCE(ifp->idev->cnf.rtr_solicit_interval));
ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
@@ -4593,9 +4642,7 @@ restart:
!ifp->regen_count && ifp->ifpub) {
/* This is a non-regenerated temporary addr. */
- unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev);
if (age + regen_advance >= ifp->prefered_lft) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -4757,6 +4804,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
u32 prio;
@@ -4777,10 +4825,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
- if (!expires)
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- else
+ fib6_remove_gc_list(f6i);
+ } else {
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(f6i);
}
@@ -4843,13 +4899,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
IFA_F_NOPREFIXROUTE);
ifp->flags |= cfg->ifa_flags;
- ifp->tstamp = jiffies;
- ifp->valid_lft = cfg->valid_lft;
- ifp->prefered_lft = cfg->preferred_lft;
- ifp->ifa_proto = cfg->ifa_proto;
+ WRITE_ONCE(ifp->tstamp, jiffies);
+ WRITE_ONCE(ifp->valid_lft, cfg->valid_lft);
+ WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft);
+ WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto);
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
- ifp->rt_priority = cfg->rt_priority;
+ WRITE_ONCE(ifp->rt_priority, cfg->rt_priority);
if (new_peer)
ifp->peer_addr = *cfg->peer_pfx;
@@ -5070,17 +5126,21 @@ struct inet6_fill_args {
enum addr_type_t type;
};
-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+static int inet6_fill_ifaddr(struct sk_buff *skb,
+ const struct inet6_ifaddr *ifa,
struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
+ struct nlmsghdr *nlh;
u32 preferred, valid;
+ u32 flags, priority;
+ u8 proto;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ flags = READ_ONCE(ifa->flags);
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
@@ -5088,13 +5148,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
- spin_lock_bh(&ifa->lock);
- if (!((ifa->flags&IFA_F_PERMANENT) &&
- (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
- preferred = ifa->prefered_lft;
- valid = ifa->valid_lft;
+ preferred = READ_ONCE(ifa->prefered_lft);
+ valid = READ_ONCE(ifa->valid_lft);
+
+ if (!((flags & IFA_F_PERMANENT) &&
+ (preferred == INFINITY_LIFE_TIME))) {
if (preferred != INFINITY_LIFE_TIME) {
- long tval = (jiffies - ifa->tstamp)/HZ;
+ long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ;
+
if (preferred > tval)
preferred -= tval;
else
@@ -5110,28 +5171,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
- spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
goto error;
- } else
+ } else {
if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
goto error;
+ }
- if (ifa->rt_priority &&
- nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+ priority = READ_ONCE(ifa->rt_priority);
+ if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority))
goto error;
- if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp),
+ preferred, valid) < 0)
goto error;
- if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+ if (nla_put_u32(skb, IFA_FLAGS, flags) < 0)
goto error;
- if (ifa->ifa_proto &&
- nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+ proto = READ_ONCE(ifa->ifa_proto);
+ if (proto && nla_put_u8(skb, IFA_PROTO, proto))
goto error;
nlmsg_end(skb, nlh);
@@ -5142,12 +5204,13 @@ error:
return -EMSGSIZE;
}
-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+static int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
- u8 scope = RT_SCOPE_UNIVERSE;
int ifindex = ifmca->idev->dev->ifindex;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5165,7 +5228,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+ put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5175,13 +5238,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+static int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
- struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5199,7 +5263,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+ put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5210,24 +5274,23 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
}
/* called with rcu_read_lock() */
-static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
- struct netlink_callback *cb, int s_ip_idx,
+static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
struct inet6_fill_args *fillargs)
{
- struct ifmcaddr6 *ifmca;
- struct ifacaddr6 *ifaca;
+ const struct ifmcaddr6 *ifmca;
+ const struct ifacaddr6 *ifaca;
int ip_idx = 0;
- int err = 1;
+ int err = 0;
- read_lock_bh(&idev->lock);
switch (fillargs->type) {
case UNICAST_ADDR: {
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
fillargs->event = RTM_NEWADDR;
/* unicast address incl. temp addr */
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (ip_idx < s_ip_idx)
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
+ if (ip_idx < *s_ip_idx)
goto next;
err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
@@ -5239,27 +5302,25 @@ next:
break;
}
case MULTICAST_ADDR:
- read_unlock_bh(&idev->lock);
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rtnl_dereference(idev->mc_list);
+ for (ifmca = rcu_dereference(idev->mc_list);
ifmca;
- ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
- if (ip_idx < s_ip_idx)
+ ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
- read_lock_bh(&idev->lock);
break;
case ANYCAST_ADDR:
fillargs->event = RTM_GETANYCAST;
/* anycast address */
- for (ifaca = idev->ac_list; ifaca;
- ifaca = ifaca->aca_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
+ for (ifaca = rcu_dereference(idev->ac_list); ifaca;
+ ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
if (err < 0)
@@ -5269,8 +5330,7 @@ next:
default:
break;
}
- read_unlock_bh(&idev->lock);
- cb->args[2] = ip_idx;
+ *s_ip_idx = err ? ip_idx : 0;
return err;
}
@@ -5333,6 +5393,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ struct net *tgt_net = sock_net(skb->sk);
const struct nlmsghdr *nlh = cb->nlh;
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(cb->skb).portid,
@@ -5341,72 +5402,53 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *tgt_net = sock_net(skb->sk);
- int idx, s_idx, s_ip_idx;
- int h, s_h;
+ struct {
+ unsigned long ifindex;
+ int ip_idx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
int err = 0;
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- s_ip_idx = cb->args[2];
-
+ rcu_read_lock();
if (cb->strict_check) {
err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- goto put_tgt_net;
+ goto done;
err = 0;
if (fillargs.ifindex) {
- dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+ dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
if (!dev) {
err = -ENODEV;
- goto put_tgt_net;
+ goto done;
}
idev = __in6_dev_get(dev);
- if (idev) {
- err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+ if (idev)
+ err = in6_dump_addrs(idev, skb, cb,
+ &ctx->ip_idx,
&fillargs);
- if (err > 0)
- err = 0;
- }
- goto put_tgt_net;
+ goto done;
}
}
- rcu_read_lock();
cb->seq = inet6_base_seq(tgt_net);
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (h > s_h || idx > s_idx)
- s_ip_idx = 0;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
- &fillargs) < 0)
- goto done;
-cont:
- idx++;
- }
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ if (err < 0)
+ goto done;
}
done:
rcu_read_unlock();
- cb->args[0] = h;
- cb->args[1] = idx;
-put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return skb->len ? : err;
+ return err;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5579,87 +5621,97 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
- __s32 *array, int bytes)
+static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
memset(array, 0, bytes);
- array[DEVCONF_FORWARDING] = cnf->forwarding;
- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
- array[DEVCONF_MTU6] = cnf->mtu6;
- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
- array[DEVCONF_AUTOCONF] = cnf->autoconf;
- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
+ array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
+ array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
+ array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
+ array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
+ array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
+ array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
+ array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_max_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
array[DEVCONF_RTR_SOLICIT_DELAY] =
- jiffies_to_msecs(cnf->rtr_solicit_delay);
- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
+ array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
- array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
- array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+ jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
+ array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
+ array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
+ array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
+ array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
+ array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
+ array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
+ array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
+ array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
+ READ_ONCE(cnf->accept_ra_min_hop_limit);
+ array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+ array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
array[DEVCONF_RTR_PROBE_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_probe_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
- array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_min_plen);
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+ array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
+ READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
- array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
+ array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
+ array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
- array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
- array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
- array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
- array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
- array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
- array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
- array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
- array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
+ array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
+ array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
+ array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
+ array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] =
+ READ_ONCE(cnf->suppress_frag_ndisc);
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
+ READ_ONCE(cnf->accept_ra_from_local);
+ array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
+ READ_ONCE(cnf->ignore_routes_with_linkdown);
/* we omit DEVCONF_STABLE_SECRET for now */
- array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
- array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
- array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
- array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
- array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
+ READ_ONCE(cnf->drop_unicast_in_l2_multicast);
+ array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
+ array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
+ array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
- array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+ array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
- array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
- array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
- array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
- array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
- array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
- array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
- array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
- array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
- array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
- array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
- array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
+ array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
+ array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
+ array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
+ array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
+ array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
+ array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
+ array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
+ array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
+ array[DEVCONF_NDISC_EVICT_NOCARRIER] =
+ READ_ONCE(cnf->ndisc_evict_nocarrier);
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] =
+ READ_ONCE(cnf->accept_untracked_na);
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}
static inline size_t inet6_ifla6_size(void)
@@ -5739,13 +5791,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
- struct nlattr *nla;
struct ifla_cacheinfo ci;
+ struct nlattr *nla;
+ u32 ra_mtu;
- if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+ if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
@@ -5777,11 +5830,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
+ READ_ONCE(idev->cnf.addr_gen_mode)))
goto nla_put_failure;
- if (idev->ra_mtu &&
- nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ ra_mtu = READ_ONCE(idev->ra_mtu);
+ if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
goto nla_put_failure;
return 0;
@@ -5843,7 +5897,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
return -EINVAL;
}
- if (idev->cnf.rtr_solicits == 0) {
+ if (READ_ONCE(idev->cnf.rtr_solicits) == 0) {
NL_SET_ERR_MSG(extack,
"Router solicitation is disabled on device");
return -EINVAL;
@@ -5876,7 +5930,7 @@ update_lft:
if (update_rs) {
idev->if_flags |= IF_RS_SENT;
idev->rs_interval = rfc3315_s14_backoff_init(
- idev->cnf.rtr_solicit_interval);
+ READ_ONCE(idev->cnf.rtr_solicit_interval));
idev->rs_probes = 1;
addrconf_mod_rs_timer(idev, idev->rs_interval);
}
@@ -5982,7 +6036,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
- idev->cnf.addr_gen_mode = mode;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
}
return 0;
@@ -5994,6 +6048,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
+ int ifindex, iflink;
void *protoinfo;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
@@ -6004,18 +6059,20 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_family = AF_INET6;
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
- hdr->ifi_index = dev->ifindex;
+ ifindex = READ_ONCE(dev->ifindex);
+ hdr->ifi_index = ifindex;
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
+ iflink = dev_get_iflink(dev);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ (ifindex != iflink &&
+ nla_put_u32(skb, IFLA_LINK, iflink)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
+ netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
if (!protoinfo)
@@ -6061,50 +6118,39 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err;
/* only requests using strict checking can pass data to
* influence the dump
*/
if (cb->strict_check) {
- int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+ err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
if (err < 0)
return err;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
+ err = 0;
rcu_read_lock();
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
- if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWLINK, NLM_F_MULTI) < 0)
- goto out;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI);
+ if (err < 0)
+ break;
}
-out:
rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
@@ -6325,7 +6371,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
- idev->cnf.disable_ipv6 = newf;
+
+ WRITE_ONCE(idev->cnf.disable_ipv6, newf);
if (changed)
dev_disable_change(idev);
}
@@ -6334,23 +6381,22 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
+ if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
+ WRITE_ONCE(*p, newf);
+ return 0;
+ }
+
if (!rtnl_trylock())
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
- *p = newf;
-
- if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
- rtnl_unlock();
- return 0;
- }
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_all->disable_ipv6) {
- net->ipv6.devconf_dflt->disable_ipv6 = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
addrconf_disable_change(net, newf);
} else if ((!newf) ^ (!old))
dev_disable_change((struct inet6_dev *)table->extra1);
@@ -6461,24 +6507,25 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
}
if (idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
- net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+ WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ new_val);
addrconf_init_auto_addrs(idev->dev);
}
}
}
- *((u32 *)ctl->data) = new_val;
+ WRITE_ONCE(*((u32 *)ctl->data), new_val);
}
out:
@@ -6537,14 +6584,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
struct inet6_dev *idev = __in6_dev_get(dev);
if (idev) {
- idev->cnf.addr_gen_mode =
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
}
} else {
struct inet6_dev *idev = ctl->extra1;
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
out:
@@ -6624,20 +6672,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
static
int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
{
+ struct net *net = (struct net *)ctl->extra2;
struct inet6_dev *idev;
- struct net *net;
-
- if (!rtnl_trylock())
- return restart_syscall();
-
- *valp = val;
- net = (struct net *)ctl->extra2;
if (valp == &net->ipv6.devconf_dflt->disable_policy) {
- rtnl_unlock();
+ WRITE_ONCE(*valp, val);
return 0;
}
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ WRITE_ONCE(*valp, val);
+
if (valp == &net->ipv6.devconf_all->disable_policy) {
struct net_device *dev;
@@ -6807,6 +6854,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "regen_min_advance",
+ .data = &ipv6_devconf.regen_min_advance,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "regen_max_retry",
.data = &ipv6_devconf.regen_max_retry,
.maxlen = sizeof(int),
@@ -7131,14 +7185,12 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- {
- /* sentinel */
- }
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
+ size_t table_size = ARRAY_SIZE(addrconf_sysctl);
int i, ifindex;
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
@@ -7147,7 +7199,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (!table)
goto out;
- for (i = 0; table[i].data; i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data += (char *)p - (char *)&ipv6_devconf;
/* If one of these is already set, then it is not safe to
* overwrite either of them: this makes proc_dointvec_minmax
@@ -7162,7 +7214,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
p->sysctl_header = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(addrconf_sysctl));
+ table_size);
if (!p->sysctl_header)
goto free;
@@ -7185,7 +7237,7 @@ out:
static void __addrconf_sysctl_unregister(struct net *net,
struct ipv6_devconf *p, int ifindex)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!p->sysctl_header)
return;
@@ -7366,7 +7418,8 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
- addrconf_wq = create_workqueue("ipv6_addrconf");
+ /* All works using addrconf_wq need to lock rtnl. */
+ addrconf_wq = create_singlethread_workqueue("ipv6_addrconf");
if (!addrconf_wq) {
err = -ENOMEM;
goto out_nowq;
@@ -7389,7 +7442,7 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, 0);
+ NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
@@ -7403,21 +7456,25 @@ int __init addrconf_init(void)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr, 0);
+ NULL, inet6_dump_ifmcaddr,
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr, 0);
+ NULL, inet6_dump_ifacaddr,
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
inet6_netconf_get_devconf,
inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
err = ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 17ac45aa71..acd70b5992 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- net->ipv6.ip6addrlbl_table.seq++;
+ WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq + 1);
return ret;
}
@@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
};
static int ip6addrlbl_fill(struct sk_buff *skb,
- struct ip6addrlbl_entry *p,
+ const struct ip6addrlbl_entry *p,
u32 lseq,
u32 portid, u32 seq, int event,
unsigned int flags)
@@ -498,7 +499,8 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
int idx = 0, s_idx = cb->args[0];
- int err;
+ int err = 0;
+ u32 lseq;
if (cb->strict_check) {
err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
@@ -507,10 +509,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_lock();
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- net->ipv6.ip6addrlbl_table.seq,
+ lseq,
NETLINK_CB(cb->skb).portid,
nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -522,7 +525,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline int ip6addrlbl_msgsize(void)
@@ -614,7 +617,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- lseq = net->ipv6.ip6addrlbl_table.seq;
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
if (p)
err = ip6addrlbl_fill(skb, p, lseq,
NETLINK_CB(in_skb).portid,
@@ -647,6 +650,7 @@ int __init ipv6_addr_label_rtnl_register(void)
return ret;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
+ RTNL_FLAG_DUMP_UNLOCKED);
return ret;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 959bfd9f63..8041dc181b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -64,6 +64,7 @@
#include <net/xfrm.h>
#include <net/ioam6.h>
#include <net/rawv6.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -736,7 +737,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet6_recvmsg, /* retpoline's sake */
.read_skb = udp_read_skb,
.mmap = sock_no_mmap,
- .set_peek_off = sk_set_peek_off,
+ .set_peek_off = udp_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bb17f484ee..0627c4c18d 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -252,9 +252,8 @@ static void aca_free_rcu(struct rcu_head *h)
static void aca_put(struct ifacaddr6 *ac)
{
- if (refcount_dec_and_test(&ac->aca_refcnt)) {
- call_rcu(&ac->rcu, aca_free_rcu);
- }
+ if (refcount_dec_and_test(&ac->aca_refcnt))
+ call_rcu_hurry(&ac->rcu, aca_free_rcu);
}
static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
@@ -296,7 +295,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr)) {
aca->aca_users++;
err = 0;
@@ -317,13 +317,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- aca->aca_next = idev->ac_list;
- idev->ac_list = aca;
-
/* Hold this for addrconf_join_solict() below before we unlock,
* it is already exposed via idev->ac_list.
*/
aca_get(aca);
+ aca->aca_next = idev->ac_list;
+ rcu_assign_pointer(idev->ac_list, aca);
+
write_unlock_bh(&idev->lock);
ipv6_add_acaddr_hash(net, aca);
@@ -350,7 +350,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_lock_bh(&idev->lock);
prev_aca = NULL;
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
prev_aca = aca;
@@ -364,9 +365,9 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
if (prev_aca)
- prev_aca->aca_next = aca->aca_next;
+ rcu_assign_pointer(prev_aca->aca_next, aca->aca_next);
else
- idev->ac_list = aca->aca_next;
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -392,8 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
struct ifacaddr6 *aca;
write_lock_bh(&idev->lock);
- while ((aca = idev->ac_list) != NULL) {
- idev->ac_list = aca->aca_next;
+ while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
@@ -420,11 +421,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ for (aca = rcu_dereference(idev->ac_list); aca;
+ aca = rcu_dereference(aca->aca_next))
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
- read_unlock_bh(&idev->lock);
return aca != NULL;
}
return false;
@@ -477,30 +477,25 @@ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
struct ac6_iter_state {
struct seq_net_private p;
struct net_device *dev;
- struct inet6_dev *idev;
};
#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
{
- struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
struct net *net = seq_file_net(seq);
+ struct ifacaddr6 *im = NULL;
- state->idev = NULL;
for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
+
idev = __in6_dev_get(state->dev);
if (!idev)
continue;
- read_lock_bh(&idev->lock);
- im = idev->ac_list;
- if (im) {
- state->idev = idev;
+ im = rcu_dereference(idev->ac_list);
+ if (im)
break;
- }
- read_unlock_bh(&idev->lock);
}
return im;
}
@@ -508,22 +503,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
{
struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct inet6_dev *idev;
- im = im->aca_next;
+ im = rcu_dereference(im->aca_next);
while (!im) {
- if (likely(state->idev != NULL))
- read_unlock_bh(&state->idev->lock);
-
state->dev = next_net_device_rcu(state->dev);
- if (!state->dev) {
- state->idev = NULL;
+ if (!state->dev)
break;
- }
- state->idev = __in6_dev_get(state->dev);
- if (!state->idev)
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
continue;
- read_lock_bh(&state->idev->lock);
- im = state->idev->ac_list;
+ im = rcu_dereference(idev->ac_list);
}
return im;
}
@@ -555,12 +545,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ac6_seq_stop(struct seq_file *seq, void *v)
__releases(RCU)
{
- struct ac6_iter_state *state = ac6_seq_private(seq);
-
- if (likely(state->idev != NULL)) {
- read_unlock_bh(&state->idev->lock);
- state->idev = NULL;
- }
rcu_read_unlock();
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1578ed9e97..eb8ee1e937 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -657,11 +657,8 @@ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def,
net_clen_bits,
spot + 1,
1);
- if (spot < 0) {
- if (spot == -2)
- return -EFAULT;
+ if (spot < 0)
return 0;
- }
ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat,
spot,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7371886d4f..3920e8aa10 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
#include <net/tcp.h>
#include <net/espintcp.h>
#include <net/inet6_hashtables.h>
+#include <linux/skbuff_ref.h>
#include <linux/highmem.h>
@@ -131,7 +132,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(skb, sg_page(sg), false);
+ skb_page_unref(sg_page(sg), skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
@@ -255,8 +256,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
@@ -383,7 +383,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -398,12 +397,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -459,7 +452,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -822,7 +814,6 @@ int esp6_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1232,9 +1223,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET6_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddb..919ebfabbe 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -83,6 +83,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 02e9ffb63a..6789623b2b 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,6 +50,7 @@
#endif
#include <net/rpl.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <net/ioam6.h>
#include <net/dst_metadata.h>
@@ -378,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
- accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
- if (accept_seg6 > idev->cnf.seg6_enabled)
- accept_seg6 = idev->cnf.seg6_enabled;
+ accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled),
+ READ_ONCE(idev->cnf.seg6_enabled));
if (!accept_seg6) {
kfree_skb(skb);
@@ -654,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
struct net *net = dev_net(skb->dev);
- int accept_source_route = net->ipv6.devconf_all->accept_source_route;
+ int accept_source_route;
- if (idev && accept_source_route > idev->cnf.accept_source_route)
- accept_source_route = idev->cnf.accept_source_route;
+ accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route);
+
+ if (idev)
+ accept_source_route = min(accept_source_route,
+ READ_ONCE(idev->cnf.accept_source_route));
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -801,7 +804,7 @@ looped_back:
ip6_route_input(skb);
if (skb_dst(skb)->error) {
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
}
@@ -818,7 +821,7 @@ looped_back:
goto looped_back;
}
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
@@ -880,14 +883,6 @@ void ipv6_exthdrs_exit(void)
Hop-by-hop options.
**********************************/
-/*
- * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
- */
-static inline struct net *ipv6_skb_net(struct sk_buff *skb)
-{
- return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
-}
-
/* Router Alert as of RFC 2711 */
static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
@@ -918,7 +913,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if IOAM is not enabled on ingress */
- if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled))
goto ignore;
/* Truncated Option header */
@@ -938,7 +933,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if the IOAM namespace is unknown */
- ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id);
if (!ns)
goto ignore;
@@ -954,6 +949,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+ optoff + sizeof(*hdr));
ioam6_fill_trace_data(skb, ns, trace, true);
+
+ ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev),
+ GFP_ATOMIC, (void *)trace, hdr->opt_len - 2);
break;
default:
break;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 52c04f0ac4..9e254de746 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
+ struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
+
+ if (!idev)
+ goto again;
err = fib6_rule_saddr(net, rule, flags, flp6,
- ip6_dst_idev(&rt->dst)->dev);
+ idev->dev);
if (err == -EAGAIN)
goto again;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1635da0728..7b31674644 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = true;
} else {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
int tmo = net->ipv6.sysctl.icmpv6_time;
struct inet_peer *peer;
@@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
dst = ip6_route_output(net, sk, fl6);
if (!dst->error) {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct in6_addr prefsrc;
rt6_get_prefsrc(rt, &prefsrc);
@@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, &fl6, dst_rt6_info(dst),
MSG_DONTWAIT)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT)) {
+ dst_rt6_info(dst), MSG_DONTWAIT)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
@@ -1206,7 +1206,6 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { },
};
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 8c1ce78956..ff7e734e33 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel(
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
- struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct rt6_info *rt = dst_rt6_info(orig_dst);
struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
struct dst_entry *dst;
int err = -EINVAL;
@@ -58,7 +58,9 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return orig_dst->lwtstate->orig_output(net, sk, skb);
}
+ local_bh_disable();
dst = dst_cache_get(&ilwt->dst_cache);
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -70,7 +72,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = orig_dst->dev->ifindex;
fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
&ip6h->daddr);
dst = ip6_route_output(net, NULL, &fl6);
@@ -86,8 +88,11 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (ilwt->connected)
+ if (ilwt->connected) {
+ local_bh_disable();
dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
}
skb_dst_set(skb, dst);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b0e8d278e8..6db71bb1cd 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -14,27 +14,26 @@
#include <linux/random.h>
#include <net/addrconf.h>
+#include <net/hotdata.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/sock_reuseport.h>
+#include <net/tcp.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
- static u32 inet6_ehash_secret __read_mostly;
- static u32 ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
- net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+ net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret));
lhash = (__force u32)laddr->s6_addr32[3];
- fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+ fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
@@ -291,7 +290,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 571f0e4d9c..08c9295130 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -612,6 +612,68 @@ static const struct genl_ops ioam6_genl_ops[] = {
},
};
+#define IOAM6_GENL_EV_GRP_OFFSET 0
+
+static const struct genl_multicast_group ioam6_mcgrps[] = {
+ [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME,
+ .flags = GENL_MCAST_CAP_NET_ADMIN },
+};
+
+static int ioam6_event_put_trace(struct sk_buff *skb,
+ struct ioam6_trace_hdr *trace,
+ unsigned int len)
+{
+ if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE,
+ be16_to_cpu(trace->namespace_id)) ||
+ nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) ||
+ nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE,
+ be32_to_cpu(trace->type_be32)) ||
+ nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA,
+ len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4,
+ trace->data + trace->remlen * 4))
+ return 1;
+
+ return 0;
+}
+
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&ioam6_genl_family, net,
+ IOAM6_GENL_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type);
+ if (!nlh)
+ goto nla_put_failure;
+
+ switch (type) {
+ case IOAM6_EVENT_UNSPEC:
+ WARN_ON_ONCE(1);
+ break;
+ case IOAM6_EVENT_TRACE:
+ if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt,
+ opt_len))
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(skb, nlh);
+ genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0,
+ IOAM6_GENL_EV_GRP_OFFSET, gfp);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+}
+
static struct genl_family ioam6_genl_family __ro_after_init = {
.name = IOAM6_GENL_NAME,
.version = IOAM6_GENL_VERSION,
@@ -620,6 +682,8 @@ static struct genl_family ioam6_genl_family __ro_after_init = {
.ops = ioam6_genl_ops,
.n_ops = ARRAY_SIZE(ioam6_genl_ops),
.resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1,
+ .mcgrps = ioam6_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps),
.module = THIS_MODULE,
};
@@ -663,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -671,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -758,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
@@ -766,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 7563f8c6aa..bf7120ecea 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -351,9 +351,9 @@ do_encap:
goto drop;
if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&ilwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -373,9 +373,9 @@ do_encap:
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8184076a39..83e4f9855a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -617,23 +620,25 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
- .filter.dump_routes = true };
+ struct rt6_rtnl_dump_arg arg = {
+ .filter.dump_exceptions = true,
+ .filter.dump_routes = true,
+ .filter.rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
unsigned int e = 0, s_e;
+ struct hlist_head *head;
struct fib6_walker *w;
struct fib6_table *tb;
- struct hlist_head *head;
- int res = 0;
+ unsigned int h, s_h;
+ int err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
- int err;
-
err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -648,8 +653,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return -ENOMEM;
+ if (!w) {
+ err = -ENOMEM;
+ goto unlock;
+ }
w->func = fib6_dump_node;
cb->args[2] = (long)w;
@@ -669,46 +676,46 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET6)
- goto out;
+ goto unlock;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
if (!cb->args[0]) {
- res = fib6_dump_table(tb, skb, cb);
- if (!res)
+ err = fib6_dump_table(tb, skb, cb);
+ if (!err)
cb->args[0] = 1;
}
- goto out;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
- res = fib6_dump_table(tb, skb, cb);
- if (res != 0)
- goto out_unlock;
+ err = fib6_dump_table(tb, skb, cb);
+ if (err != 0)
+ goto out;
next:
e++;
}
}
-out_unlock:
- rcu_read_unlock();
+out:
cb->args[1] = e;
cb->args[0] = h;
-out:
- res = res < 0 ? res : skb->len;
- if (res <= 0)
+
+unlock:
+ rcu_read_unlock();
+ if (err <= 0)
fib6_dump_end(cb);
- return res;
+ return err;
}
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
@@ -751,8 +758,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
int bit;
__be32 dir = 0;
- RT6_TRACE("fib6_add_1\n");
-
/* insert node in tree */
fn = root;
@@ -961,6 +966,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (!fib6_nh->rt6i_pcpu)
return;
+ rcu_read_lock();
/* release the reference to this fib entry from
* all of its cached pcpu routes
*/
@@ -969,7 +975,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
+
+ /* Paired with xchg() in rt6_get_pcpu_route() */
+ pcpu_rt = READ_ONCE(*ppcpu_rt);
/* only dropping the 'from' reference if the cached route
* is using 'match'. The cached pcpu_rt->from only changes
@@ -983,6 +991,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
fib6_info_release(from);
}
}
+ rcu_read_unlock();
}
struct fib6_nh_pcpu_arg {
@@ -1057,6 +1066,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
}
/*
@@ -1117,10 +1129,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
rt->fib6_nsiblings = 0;
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->fib6_flags & RTF_EXPIRES))
+ if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- else
+ fib6_remove_gc_list(iter);
+ } else {
fib6_set_expires(iter, rt->expires);
+ fib6_add_gc_list(iter);
+ }
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1482,6 +1497,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (rt->fib6_flags & RTF_EXPIRES)
+ fib6_add_gc_list(rt);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -1806,7 +1825,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_info *new_fn_leaf;
- RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
@@ -1869,7 +1888,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
FOR_WALKERS(net, w) {
if (!child) {
if (w->node == fn) {
- RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
+ w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
@@ -1877,10 +1897,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (w->node == fn) {
w->node = child;
if (children&2) {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
@@ -1908,8 +1930,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct net *net = info->nl_net;
bool notify_del = false;
- RT6_TRACE("fib6_del_route\n");
-
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
@@ -1958,7 +1978,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
- RT6_TRACE("walker %p adjusted by delroute\n", w);
+ pr_debug("walker %p adjusted by delroute\n", w);
w->leaf = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
@@ -2284,9 +2304,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2296,7 +2315,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
- RT6_TRACE("expiring %p\n", rt);
+ pr_debug("expiring %p\n", rt);
return -1;
}
gc_args->more++;
@@ -2311,6 +2330,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+
+ fib6_gc_table(net, table, gc_args);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2326,7 +2381,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -2386,6 +2441,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2398,6 +2454,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
fib6_tables_init(net);
@@ -2447,10 +2504,8 @@ int __init fib6_init(void)
{
int ret = -ENOMEM;
- fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
+ fib6_node_kmem = KMEM_CACHE(fib6_node,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
if (!fib6_node_kmem)
goto out;
@@ -2459,7 +2514,8 @@ int __init fib6_init(void)
goto out_kmem_cache_create;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, 0);
+ inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
+ RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 289b83347d..3942bd2ade 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -496,11 +496,11 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
tpi->proto);
if (tunnel) {
if (tunnel->parms.collect_md) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
__be64 tun_id;
- __be16 flags;
- flags = tpi->flags;
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
@@ -551,14 +551,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (tunnel->parms.collect_md) {
struct erspan_metadata *pkt_md, *md;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
@@ -580,7 +580,8 @@ static int ip6erspan_rcv(struct sk_buff *skb,
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE);
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
@@ -748,8 +749,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 protocol;
- __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -781,8 +782,11 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- flags = key->tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ ip_tunnel_flags_zero(flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, flags, key->tun_flags);
tun_hlen = gre_calc_hlen(flags);
if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
@@ -791,19 +795,21 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
} else {
if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
return -ENOMEM;
- flags = tunnel->parms.o_flags;
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -825,7 +831,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
&dsfield, &encap_limit);
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return -1;
@@ -859,7 +866,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags)))
return -1;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
@@ -886,7 +894,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return err;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol);
@@ -939,6 +948,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info = NULL;
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
bool truncate = false;
int encap_limit = -1;
__u8 dsfield = false;
@@ -982,7 +992,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
- t->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
IPCB(skb)->flags = 0;
/* For collect_md mode, derive fl6 from the tunnel key,
@@ -1007,7 +1017,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_info->key.tun_flags))
goto tx_err;
if (tun_info->options_len < sizeof(*md))
goto tx_err;
@@ -1068,7 +1079,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
/* Push GRE header. */
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1211,8 +1224,8 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
t->parms.proto = p->proto;
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
t->parms.fwmark = p->fwmark;
t->parms.erspan_ver = p->erspan_ver;
t->parms.index = p->index;
@@ -1241,8 +1254,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
p->link = u->link;
p->i_key = u->i_key;
p->o_key = u->o_key;
- p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, u->i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, u->o_flags);
memcpy(p->name, u->name, sizeof(u->name));
}
@@ -1394,7 +1407,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ipv6h->daddr = t->parms.raddr;
p = (__be16 *)(ipv6h + 1);
- p[0] = t->parms.o_flags;
+ p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags);
p[1] = htons(type);
/*
@@ -1421,7 +1434,6 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1431,7 +1443,6 @@ static void ip6gre_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static void ip6gre_tunnel_setup(struct net_device *dev)
@@ -1440,6 +1451,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_IP6GRE;
dev->flags |= IFF_NOARP;
@@ -1458,19 +1470,17 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- __be16 flags;
dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE6_FEATURES;
- flags = nt->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) &&
+ nt->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1489,13 +1499,9 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1519,9 +1525,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1636,21 +1639,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6gre_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
+ .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1797,12 +1798,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1855,7 +1856,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1884,13 +1884,9 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1912,9 +1908,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1925,7 +1918,6 @@ static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1939,6 +1931,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2149,11 +2142,13 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (p->erspan_ver == 1 || p->erspan_ver == 2) {
if (!p->collect_md)
- o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
goto nla_put_failure;
@@ -2239,6 +2234,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2410,7 +2406,7 @@ static void __exit ip6gre_fini(void)
module_init(ip6gre_init);
module_exit(ip6gre_fini);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
MODULE_ALIAS_RTNL_LINK("ip6gretap");
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index b837881453..133610a49d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
- !idev || unlikely(idev->cnf.disable_ipv6)) {
+ !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- if (idev && unlikely(idev->cnf.disable_ipv6))
+ if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6)))
SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast) {
+ READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) {
SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cca64c7809..9822163428 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -67,7 +67,7 @@ static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
off += len;
}
- skb_gro_pull(skb, off - skb_network_offset(skb));
+ skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb));
return proto;
}
@@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if (unlikely(!iph))
goto out;
- skb_set_network_header(skb, off);
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
@@ -259,7 +259,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
NAPI_GRO_CB(skb)->proto = proto;
flush--;
- nlen = skb_network_header_len(skb);
+ nlen = skb_gro_offset(skb) - off;
list_for_each_entry(p, head, list) {
const struct ipv6hdr *iph2;
@@ -290,19 +290,8 @@ not_same_flow:
nlen - sizeof(struct ipv6hdr)))
goto not_same_flow;
}
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
- (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = 0;
}
- NAPI_GRO_CB(skb)->is_atomic = true;
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_postpull_rcsum(skb, iph, nlen);
@@ -419,14 +408,6 @@ static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff)
return inet_gro_complete(skb, nhoff);
}
-static struct packet_offload ipv6_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IPV6),
- .callbacks = {
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
- },
-};
static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -486,7 +467,15 @@ static int __init ipv6_offload_init(void)
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
- dev_add_offload(&ipv6_packet_offload);
+ net_hotdata.ipv6_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ipv6_packet_offload);
inet_add_offload(&sit_offload, IPPROTO_IPV6);
inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31b86fe661..784424ac41 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
rcu_read_lock();
- nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(IS_ERR_OR_NULL(neigh))) {
@@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (unlikely(idev->cnf.disable_ipv6)) {
+ if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
@@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb)
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
goto error;
if (skb->pkt_type != PACKET_HOST)
@@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!net->ipv6.devconf_all->disable_policy &&
- (!idev || !idev->cnf.disable_policy) &&
+ if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
+ (!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb)
}
/* XXX: idev->cnf.proxy_ndp? */
- if (net->ipv6.devconf_all->proxy_ndp &&
+ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
@@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb)
* send a redirect.
*/
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY)
target = &rt->rt6i_gateway;
else
@@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
bool mono_delivery_time = skb->mono_delivery_time;
@@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
return NULL;
}
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -1118,12 +1118,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
*dst = ip6_route_output(net, sk, fl6);
- rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
+ rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
+ fl6->flowi6_l3mdev,
&fl6->saddr);
rcu_read_unlock();
@@ -1159,7 +1160,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- rt = (struct rt6_info *) *dst;
+ rt = dst_rt6_info(*dst);
rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
@@ -1423,7 +1424,7 @@ static int __ip6_append_data(struct sock *sk,
int offset = 0;
bool zc = false;
u32 tskey = 0;
- struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ struct rt6_info *rt = dst_rt6_info(cork->dst);
bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
@@ -1877,7 +1878,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
- struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+ struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
@@ -1933,7 +1934,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
u8 icmp6_type;
if (sk->sk_socket->type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
@@ -1949,7 +1950,7 @@ out:
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
err = ip6_local_out(net, skb->sk, skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 70478027a7..9dee0c1279 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -247,7 +247,6 @@ static void ip6_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static int ip6_tnl_create2(struct net_device *dev)
@@ -799,17 +798,15 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
int nh, err;
- if ((!(tpi->flags & TUNNEL_CSUM) &&
- (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
- ((tpi->flags & TUNNEL_CSUM) &&
- !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags & TUNNEL_SEQ) {
- if (!(tpi->flags & TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno &&
(s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
@@ -947,7 +944,9 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (t->parms.collect_md) {
- tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
goto drop;
}
@@ -1747,7 +1746,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL(ip6_tnl_change_mtu);
@@ -1756,7 +1755,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- return t->parms.link;
+ return READ_ONCE(t->parms.link);
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
@@ -1848,6 +1847,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->features |= NETIF_F_LLTX;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
dev->features |= IPXIPX_FEATURES;
@@ -1873,13 +1873,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret)
- goto free_stats;
+ return ret;
ret = gro_cells_init(&t->gro_cells, dev);
if (ret)
@@ -1903,9 +1900,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
destroy_dst:
dst_cache_destroy(&t->dst_cache);
-free_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -2152,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);
@@ -2283,21 +2277,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6_tnl_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
+ .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 1163ca6ea4..590737c275 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -174,11 +174,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
}
}
-static void vti6_dev_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
static int vti6_tnl_create2(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
@@ -671,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
dev->flags &= ~IFF_POINTOPOINT;
if (keep_mtu && dev->mtu) {
- dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+ WRITE_ONCE(dev->mtu,
+ clamp(dev->mtu, dev->min_mtu, dev->max_mtu));
return;
}
@@ -892,7 +888,6 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_siocdevprivate = vti6_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -908,8 +903,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->netdev_ops = &vti6_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = vti6_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_TUNNEL6;
dev->min_mtu = IPV4_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
@@ -931,9 +926,6 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
return 0;
@@ -1175,24 +1167,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
+static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct vti6_net *ip6n;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, &list);
+ vti6_destroy_tunnels(ip6n, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch = vti6_exit_batch_net,
+ .exit_batch_rtnl = vti6_exit_batch_rtnl,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9782c180fe..dd342e6ecf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1373,10 +1373,7 @@ int __init ip6_mr_init(void)
{
int err;
- mrt_cachep = kmem_cache_create("ip6_mrt_cache",
- sizeof(struct mfc6_cache),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
if (!mrt_cachep)
return -ENOMEM;
@@ -2276,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
int err;
struct mr_table *mrt;
struct mfc6_cache *cache;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -2595,7 +2592,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 56c3c467f9..d4c28ec1bc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -948,6 +948,8 @@ done:
if (optlen < sizeof(int))
goto e_inval;
retv = ip6_ra_control(sk, val);
+ if (retv == 0)
+ inet6_assign_bit(RTALERT, sk, valbool);
break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
@@ -1346,7 +1348,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
if (val < 0)
- val = sock_net(sk)->ipv6.devconf_all->hop_limit;
+ val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit);
break;
}
@@ -1445,6 +1447,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rxopt.bits.recvfragsize;
break;
+ case IPV6_ROUTER_ALERT:
+ val = inet6_test_bit(RTALERT, sk);
+ break;
+
case IPV6_ROUTER_ALERT_ISOLATE:
val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 76ee1615ff..7ba01d8cfb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
int iv;
if (mld_in_v1_mode(idev))
- iv = idev->cnf.mldv1_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval);
else
- iv = idev->cnf.mldv2_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval);
return iv > 0 ? iv : 1;
}
@@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
static int mld_force_mld_version(const struct inet6_dev *idev)
{
+ const struct net *net = dev_net(idev->dev);
+ int all_force;
+
+ all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version);
/* Normally, both are 0 here. If enforcement to a particular is
* being used, individual device enforcement will have a lower
* precedence over 'all' device (.../conf/all/force_mld_version).
*/
-
- if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
- return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
- else
- return idev->cnf.force_mld_version;
+ return all_force ?: READ_ONCE(idev->cnf.force_mld_version);
}
static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a19999b30b..0282d15725 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
return NULL;
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
+ bool unknown = false;
int l;
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
@@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
break;
#endif
default:
- if (ndisc_is_useropt(dev, nd_opt)) {
- ndopts->nd_useropts_end = nd_opt;
- if (!ndopts->nd_useropts)
- ndopts->nd_useropts = nd_opt;
- } else {
- /*
- * Unknown options must be silently ignored,
- * to accommodate future extension to the
- * protocol.
- */
- ND_PRINTK(2, notice,
- "%s: ignored unsupported option; type=%d, len=%d\n",
- __func__,
- nd_opt->nd_opt_type,
- nd_opt->nd_opt_len);
- }
+ unknown = true;
+ }
+ if (ndisc_is_useropt(dev, nd_opt)) {
+ ndopts->nd_useropts_end = nd_opt;
+ if (!ndopts->nd_useropts)
+ ndopts->nd_useropts = nd_opt;
+ } else if (unknown) {
+ /*
+ * Unknown options must be silently ignored,
+ * to accommodate future extension to the
+ * protocol.
+ */
+ ND_PRINTK(2, notice,
+ "%s: ignored unsupported option; type=%d, len=%d\n",
+ __func__,
+ nd_opt->nd_opt_type,
+ nd_opt->nd_opt_len);
}
next_opt:
opt_len -= l;
@@ -451,7 +453,7 @@ static void ip6_nd_hdr(struct sk_buff *skb,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
@@ -535,7 +537,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
override = false;
- inc_opt |= ifp->idev->cnf.force_tllao;
+ inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
in6_ifa_put(ifp);
} else {
if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -903,8 +905,9 @@ have_ifp:
}
if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
- (idev->cnf.forwarding &&
- (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
+ (READ_ONCE(idev->cnf.forwarding) &&
+ (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
+ READ_ONCE(idev->cnf.proxy_ndp)) &&
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
@@ -929,7 +932,7 @@ have_ifp:
}
if (is_router < 0)
- is_router = idev->cnf.forwarding;
+ is_router = READ_ONCE(idev->cnf.forwarding);
if (dad) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
@@ -973,7 +976,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
- switch (idev->cnf.accept_untracked_na) {
+ switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
case 0: /* Don't accept untracked na (absent in neighbor cache) */
return 0;
case 1: /* Create new entries from na if currently untracked */
@@ -1024,7 +1027,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* drop_unsolicited_na takes precedence over accept_untracked_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
- idev->cnf.drop_unsolicited_na)
+ READ_ONCE(idev->cnf.drop_unsolicited_na))
return reason;
if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
@@ -1080,7 +1083,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* Note that we don't do a (daddr == all-routers-mcast) check.
*/
new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
- if (!neigh && lladdr && idev && idev->cnf.forwarding) {
+ if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
if (accept_untracked_na(dev, saddr)) {
neigh = neigh_create(&nd_tbl, &msg->target, dev);
new_state = NUD_STALE;
@@ -1100,7 +1103,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* has already sent a NA to us.
*/
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
- net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+ READ_ONCE(net->ipv6.devconf_all->forwarding) &&
+ READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
/* XXX: idev->cnf.proxy_ndp */
goto out;
@@ -1148,7 +1152,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
}
/* Don't accept RS if we're not in router mode */
- if (!idev->cnf.forwarding)
+ if (!READ_ONCE(idev->cnf.forwarding))
goto out;
/*
@@ -1237,6 +1241,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts;
struct fib6_info *rt = NULL;
struct inet6_dev *in6_dev;
+ struct fib6_table *table;
u32 defrtr_usr_metric;
unsigned int pref = 0;
__u32 old_if_flags;
@@ -1317,7 +1322,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
if (old_if_flags != in6_dev->if_flags)
send_ifinfo_notify = true;
- if (!in6_dev->cnf.accept_ra_defrtr) {
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
ND_PRINTK(2, info,
"RA: %s, defrtr is false for dev: %s\n",
__func__, skb->dev->name);
@@ -1325,7 +1330,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
- if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ if (lifetime != 0 &&
+ lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
ND_PRINTK(2, info,
"RA: router lifetime (%ds) is too short: %s\n",
lifetime, skb->dev->name);
@@ -1336,7 +1342,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
* accept_ra_from_local is set to true.
*/
net = dev_net(in6_dev->dev);
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
@@ -1348,7 +1354,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
if (pref == ICMPV6_ROUTER_PREF_INVALID ||
- !in6_dev->cnf.accept_ra_rtr_pref)
+ !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
/* routes added from RAs do not use nexthop objects */
@@ -1382,7 +1388,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
neigh_release(neigh);
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
- skb->dev, pref, defrtr_usr_metric);
+ skb->dev, pref, defrtr_usr_metric,
+ lifetime);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
@@ -1409,12 +1416,21 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
- if (rt)
+ if (rt) {
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(rt, jiffies + (HZ * lifetime));
- if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ fib6_add_gc_list(rt);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
- if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
- in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
+ ra_msg->icmph.icmp6_hop_limit) {
+ WRITE_ONCE(in6_dev->cnf.hop_limit,
+ ra_msg->icmph.icmp6_hop_limit);
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
@@ -1496,7 +1512,7 @@ skip_linkparms:
}
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
ND_PRINTK(2, info,
@@ -1505,7 +1521,7 @@ skip_linkparms:
goto skip_routeinfo;
}
- if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_ri;
p;
@@ -1517,14 +1533,14 @@ skip_linkparms:
continue;
#endif
if (ri->prefix_len == 0 &&
- !in6_dev->cnf.accept_ra_defrtr)
+ !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
continue;
if (ri->lifetime != 0 &&
- ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
continue;
- if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
continue;
- if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+ if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
@@ -1544,7 +1560,7 @@ skip_routeinfo:
}
#endif
- if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_pi;
p;
@@ -1555,7 +1571,7 @@ skip_routeinfo:
}
}
- if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
+ if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
__be32 n;
u32 mtu;
@@ -1569,8 +1585,8 @@ skip_routeinfo:
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
- } else if (in6_dev->cnf.mtu6 != mtu) {
- in6_dev->cnf.mtu6 = mtu;
+ } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
+ WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
@@ -1708,7 +1724,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
if (IS_ERR(dst))
return;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK(2, warn,
@@ -1804,7 +1820,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
if (!idev)
return true;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
- idev->cnf.suppress_frag_ndisc) {
+ READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
return true;
}
@@ -1881,8 +1897,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
idev = in6_dev_get(dev);
if (!idev)
break;
- if (idev->cnf.ndisc_notify ||
- net->ipv6.devconf_all->ndisc_notify)
+ if (READ_ONCE(idev->cnf.ndisc_notify) ||
+ READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
@@ -1891,8 +1907,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
if (!idev)
evict_nocarrier = true;
else {
- evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
- net->ipv6.devconf_all->ndisc_evict_nocarrier;
+ evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
+ READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
in6_dev_put(idev);
}
@@ -1966,7 +1982,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 53d255838e..5d989d8030 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
.saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
};
int err;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0ba62f4868..f3c8e2d918 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -6,6 +6,10 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+# old sockopt interface and eval loop
+config IP6_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP6_NF_MANGLE || IP6_NF_RAW
+ depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
+ tristate
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -195,7 +201,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP6_NF_FILTER
+ depends on IP6_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
@@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -230,6 +237,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -243,6 +251,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -254,6 +263,7 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
+ select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@@ -262,25 +272,23 @@ config IP6_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-if IP6_NF_NAT
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NETFILTER_XT_TARGET_MASQUERADE
+ depends on IP6_NF_NAT
help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
+ depends on IP6_NF_NAT || NFT_COMPAT
help
This option adds the `SNPT' and `DNPT' target, which perform
stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
To compile it as a module, choose M here. If unsure, say N.
-endif # IP6_NF_NAT
-
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b8d6dc9aee..66ce6fa5b2 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,7 +4,7 @@
#
# Link order matters here.
-obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index df785ebda0..e8992693e1 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -43,7 +43,7 @@ static int ip6table_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ip6t_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 52cf104e34..e119d4f090 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv6_table,
- ip6table_nat_table_init);
+ int ret;
+ /* net->gen->ptr[ip6table_nat_net_id] must be allocated
+ * before calling ip6t_nat_register_lookups().
+ */
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
if (ret)
- xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
return ret;
}
static void __exit ip6table_nat_exit(void)
{
- unregister_pernet_subsys(&ip6table_nat_net_ops);
xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index efbec7ee27..5e1b50c6a4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- { }
};
static int nf_ct_frag6_sysctl_register(struct net *net)
@@ -105,7 +104,7 @@ err_alloc:
static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = nf_frag->nf_frag_frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(nf_frag->nf_frag_frags_hdr);
@@ -328,9 +327,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto err;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 196dd4ecb5..dedee264b8 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen);
nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
@@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
skb_reset_transport_header(nskb);
icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr));
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index b5205311f3..806d4b5dd1 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev)
- hoplimit = idev->cnf.hop_limit;
+ hoplimit = READ_ONCE(idev->cnf.hop_limit);
else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit);
rcu_read_unlock();
}
return hoplimit;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ef2059c889..88b3fcacd4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 03dbb874c3..2eedf25560 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -160,6 +160,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
inet6_iif(skb), inet6_sdif(skb)))
continue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >=
+ READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_inc(&sk->sk_drops);
+ continue;
+ }
+
delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
@@ -288,8 +295,7 @@ out:
}
static void rawv6_err(struct sock *sk, struct sk_buff *skb,
- struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -344,7 +350,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr,
inet6_iif(skb), inet6_iif(skb)))
continue;
- rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
+ rawv6_err(sk, skb, type, code, inner_offset, info);
}
rcu_read_unlock();
}
@@ -592,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
- struct rt6_info *rt = (struct rt6_info *)*dstp;
+ struct rt6_info *rt = dst_rt6_info(*dstp);
int hlen = LL_RESERVED_SPACE(rt->dst.dev);
int tlen = rt->dst.dev->needed_tailroom;
@@ -911,7 +917,7 @@ back_from_confirm:
ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ len, 0, &ipc6, &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
@@ -935,7 +941,7 @@ do_confirm:
goto done;
}
-static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_seticmpfilter(struct sock *sk, int optname,
sockptr_t optval, int optlen)
{
switch (optname) {
@@ -952,7 +958,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
return 0;
}
-static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_geticmpfilter(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
int len;
@@ -1038,7 +1044,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_seticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
@@ -1099,7 +1105,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_geticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5ebc47da10..327caca642 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto out_oom;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
* the source of the fragment, with the Pointer field set to zero.
*/
nexthdr = hdr->nexthdr;
- if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+ if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
@@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
@@ -487,7 +485,7 @@ err_alloc:
static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ef815ba583..c9a9506b71 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev);
@@ -226,7 +227,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+ const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
dst->dev, skb, daddr);
@@ -234,8 +235,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rt6_info *rt = dst_rt6_info(dst);
struct net_device *dev = dst->dev;
- struct rt6_info *rt = (struct rt6_info *)dst;
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -354,7 +355,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct fib6_info *from;
struct inet6_dev *idev;
@@ -373,7 +374,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
if (idev && idev->dev != blackhole_netdev) {
@@ -637,6 +638,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
+ if (!idev)
+ goto out;
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
if (READ_ONCE(neigh->nud_state) & NUD_VALID)
@@ -645,14 +648,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
write_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
- neigh->updated + idev->cnf.rtr_probe_interval)) {
+ neigh->updated +
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work)
__neigh_set_probe_once(neigh);
}
write_unlock_bh(&neigh->lock);
} else if (time_after(jiffies, last_probe +
- idev->cnf.rtr_probe_interval)) {
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
@@ -931,6 +935,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
+ struct fib6_table *table;
unsigned int pref;
unsigned long lifetime;
struct fib6_info *rt;
@@ -989,10 +994,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime))
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- else
+ fib6_remove_gc_list(rt);
+ } else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_add_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt);
}
@@ -1278,7 +1291,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
- return (struct rt6_info *) dst;
+ return dst_rt6_info(dst);
dst_release(dst);
@@ -1398,6 +1411,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
struct rt6_info *prev, **p;
p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
prev = xchg(p, NULL);
if (prev) {
dst_dev_put(&prev->dst);
@@ -1587,7 +1601,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
rcu_read_lock();
idev = __in6_dev_get(dev);
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
}
@@ -2085,12 +2099,12 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
} else if (time_after(jiffies, rt->dst.expires)) {
- RT6_TRACE("purging expired route %p\n", rt);
+ pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2101,8 +2115,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (!(neigh && (neigh->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
+ pr_debug("purging route %p via non-router but gateway\n",
+ rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2211,7 +2225,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
@@ -2637,7 +2651,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
/* For dst cached in uncached_list, refcnt is already taken. */
if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
@@ -2651,7 +2665,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
@@ -2734,7 +2748,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
struct fib6_info *from;
struct rt6_info *rt;
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->sernum)
return rt6_is_valid(rt) ? dst : NULL;
@@ -2760,24 +2774,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
- if (rt) {
- if (rt->rt6i_flags & RTF_CACHE) {
- rcu_read_lock();
- if (rt6_check_expired(rt)) {
- rt6_remove_exception_rt(rt);
- dst = NULL;
- }
- rcu_read_unlock();
- } else {
- dst_release(dst);
- dst = NULL;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
+ if (rt6_check_expired(rt)) {
+ /* counteract the dst_release() in sk_dst_reset() */
+ dst_hold(dst);
+ sk_dst_reset(sk);
+
+ rt6_remove_exception_rt(rt);
}
+ rcu_read_unlock();
+ return;
}
- return dst;
+ sk_dst_reset(sk);
}
static void ip6_link_failure(struct sk_buff *skb)
@@ -2786,7 +2800,7 @@ static void ip6_link_failure(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2842,7 +2856,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
* IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
@@ -3240,8 +3254,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
- if (idev && idev->cnf.mtu6 > mtu)
- mtu = idev->cnf.mtu6;
+ if (idev)
+ mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
@@ -3591,7 +3605,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
if (!dev)
goto out;
- if (idev->cnf.disable_ipv6) {
+ if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
@@ -3765,8 +3779,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
- else
- fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4142,7 +4154,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev)
return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ if (READ_ONCE(in6_dev->cnf.forwarding) ||
+ !READ_ONCE(in6_dev->cnf.accept_redirects))
return;
/* RFC2461 8.1:
@@ -4165,7 +4178,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
@@ -4355,7 +4368,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref,
- u32 defrtr_usr_metric)
+ u32 defrtr_usr_metric,
+ int lifetime)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
@@ -4368,6 +4382,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
+ .fc_expires = jiffies_to_clock_t(lifetime * HZ),
};
cfg.fc_gateway = *gwaddr;
@@ -4434,7 +4449,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4464,6 +4479,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
+ /* Only do the default setting of fc_metric in route adding */
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
@@ -4574,8 +4592,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
f6i->dst_nocount = true;
if (!anycast &&
- (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy))
+ (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
+ READ_ONCE(idev->cnf.disable_policy)))
f6i->dst_nopolicy = true;
}
@@ -5597,7 +5615,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
unsigned char nh_flags = 0;
@@ -5671,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
} else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
@@ -6100,7 +6118,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
@@ -6327,12 +6345,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
if (!write)
return -EINVAL;
- net = (struct net *)ctl->extra1;
- delay = net->ipv6.sysctl.flush_delay;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (ret)
return ret;
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
@@ -6417,7 +6435,6 @@ static struct ctl_table ipv6_route_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
@@ -6441,10 +6458,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[1].procname = NULL;
}
return table;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index a013b92cbb..2c83b75864 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -212,9 +212,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -234,9 +234,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
@@ -268,23 +268,21 @@ static int rpl_input(struct sk_buff *skb)
return err;
}
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
- preempt_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
}
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
+ local_bh_enable();
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 35508abd76..a31521e270 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -551,6 +551,8 @@ out_unregister_iptun:
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
out_unregister_genl:
+#endif
+#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC)
genl_unregister_family(&seg6_genl_family);
#endif
out_unregister_pernet:
@@ -564,8 +566,9 @@ void seg6_exit(void)
seg6_hmac_exit();
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_local_exit();
seg6_iptunnel_exit();
#endif
- unregister_pernet_subsys(&ip6_segments_ops);
genl_unregister_family(&seg6_genl_family);
+ unregister_pernet_subsys(&ip6_segments_ops);
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index d43c50a731..bbf5b84a70 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
struct sr6_tlv_hmac *tlv;
struct ipv6_sr_hdr *srh;
struct inet6_dev *idev;
+ int require_hmac;
idev = __in6_dev_get(skb->dev);
@@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
tlv = seg6_get_tlv_hmac(srh);
+ require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac);
/* mandatory check but no tlv */
- if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+ if (require_hmac > 0 && !tlv)
return false;
/* no check */
- if (idev->cnf.seg6_require_hmac < 0)
+ if (require_hmac < 0)
return true;
/* check only if present */
- if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+ if (require_hmac == 0 && !tlv)
return true;
/* now, seg6_require_hmac >= 0 && tlv */
@@ -354,6 +356,7 @@ static int seg6_hmac_init_algo(void)
struct crypto_shash *tfm;
struct shash_desc *shash;
int i, alg_count, cpu;
+ int ret = -ENOMEM;
alg_count = ARRAY_SIZE(hmac_algos);
@@ -364,12 +367,14 @@ static int seg6_hmac_init_algo(void)
algo = &hmac_algos[i];
algo->tfms = alloc_percpu(struct crypto_shash *);
if (!algo->tfms)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
tfm = crypto_alloc_shash(algo->name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto error_out;
+ }
p_tfm = per_cpu_ptr(algo->tfms, cpu);
*p_tfm = tfm;
}
@@ -381,18 +386,22 @@ static int seg6_hmac_init_algo(void)
algo->shashs = alloc_percpu(struct shash_desc *);
if (!algo->shashs)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
shash = kzalloc_node(shsize, GFP_KERNEL,
cpu_to_node(cpu));
if (!shash)
- return -ENOMEM;
+ goto error_out;
*per_cpu_ptr(algo->shashs, cpu) = shash;
}
}
return 0;
+
+error_out:
+ seg6_hmac_exit();
+ return ret;
}
int __init seg6_hmac_init(void)
@@ -410,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net)
void seg6_hmac_exit(void)
{
struct seg6_hmac_algo *algo = NULL;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
int i, alg_count, cpu;
alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
- for_each_possible_cpu(cpu) {
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- shash = *per_cpu_ptr(algo->shashs, cpu);
- kfree(shash);
- tfm = *per_cpu_ptr(algo->tfms, cpu);
- crypto_free_shash(tfm);
+ if (algo->shashs) {
+ for_each_possible_cpu(cpu) {
+ shash = *per_cpu_ptr(algo->shashs, cpu);
+ kfree(shash);
+ }
+ free_percpu(algo->shashs);
+ }
+
+ if (algo->tfms) {
+ for_each_possible_cpu(cpu) {
+ tfm = *per_cpu_ptr(algo->tfms, cpu);
+ crypto_free_shash(tfm);
+ }
+ free_percpu(algo->tfms);
}
- free_percpu(algo->tfms);
- free_percpu(algo->shashs);
}
}
EXPORT_SYMBOL(seg6_hmac_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 03b877ff45..098632adc9 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -459,34 +459,30 @@ static int seg6_input_core(struct net *net, struct sock *sk,
int err;
err = seg6_do_srh(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
- preempt_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
}
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
+ local_bh_enable();
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -494,6 +490,9 @@ static int seg6_input_core(struct net *net, struct sock *sk,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
+drop:
+ kfree_skb(skb);
+ return err;
}
static int seg6_input_nf(struct sk_buff *skb)
@@ -535,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -557,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
goto drop;
}
- preempt_disable();
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ local_bh_enable();
}
skb_dst_drop(skb);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 24e2b4b494..c434940131 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx6_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx6_finish);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx4_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fbad6e1c97..83b195f095 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -132,8 +132,8 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
return NULL;
}
-static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
- struct ip_tunnel_parm *parms)
+static struct ip_tunnel __rcu **
+__ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -207,7 +207,7 @@ static int ipip6_tunnel_create(struct net_device *dev)
__dev_addr_set(dev, &t->parms.iph.saddr, 4);
memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
- if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags))
dev->priv_flags |= IFF_ISATAP;
dev->rtnl_link_ops = &sit_link_ops;
@@ -226,7 +226,8 @@ out:
}
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms, int create)
+ struct ip_tunnel_parm_kern *parms,
+ int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -1135,7 +1136,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
}
-static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+static void ipip6_tunnel_update(struct ip_tunnel *t,
+ struct ip_tunnel_parm_kern *p,
__u32 fwmark)
{
struct net *net = t->net;
@@ -1196,11 +1198,11 @@ static int
ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
struct ip_tunnel_6rd ip6rd;
- struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1251,7 +1253,7 @@ static bool ipip6_valid_ip_proto(u8 ipproto)
}
static int
-__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
+__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p)
{
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1268,7 +1270,7 @@ __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1281,7 +1283,7 @@ ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1297,7 +1299,7 @@ ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1328,7 +1330,7 @@ ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1348,7 +1350,8 @@ ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
@@ -1398,7 +1401,6 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
};
@@ -1408,7 +1410,6 @@ static void ipip6_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
}
#define SIT_FEATURES (NETIF_F_SG | \
@@ -1437,6 +1438,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->features |= NETIF_F_LLTX;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1449,16 +1452,11 @@ static int ipip6_tunnel_init(struct net_device *dev)
strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
+ if (err)
return err;
- }
+
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
return 0;
@@ -1495,7 +1493,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip6_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -1604,8 +1602,8 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1692,7 +1690,7 @@ static size_t ipip6_get_size(const struct net_device *dev)
static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
@@ -1702,7 +1700,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
- nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS,
+ ip_tunnel_flags_to_be16(parm->i_flags)) ||
nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
@@ -1876,22 +1875,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- LIST_HEAD(list);
struct net *net;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, &list);
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ sit_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch = sit_exit_batch_net,
+ .exit_batch_rtnl = sit_exit_batch_rtnl,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c8d2ca2722..bfad1e89b6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -177,24 +177,33 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct sock *ret = sk;
__u8 rcv_wscale;
int full_space;
+ SKB_DR(reason);
if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
- if (!req)
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
+ if (!req) {
+ SKB_DR_SET(reason, NO_SOCKET);
goto out_drop;
+ }
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (security_inet_conn_request(sk, skb, req))
+ if (security_inet_conn_request(sk, skb, req)) {
+ SKB_DR_SET(reason, SECURITY_HOOK);
goto out_free;
+ }
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -231,11 +240,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
- if (IS_ERR(dst))
+ if (IS_ERR(dst)) {
+ SKB_DR_SET(reason, IP_OUTNOROUTES);
goto out_free;
+ }
}
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
@@ -247,14 +258,23 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ /* req->syncookie is set true only if ACK is validated
+ * by BPF kfunc, then, rcv_wscale is already configured.
+ */
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, dst);
ret = tcp_get_cookie_sock(sk, skb, req, dst);
+ if (!ret) {
+ SKB_DR_SET(reason, NO_SOCKET);
+ goto out_drop;
+ }
out:
return ret;
out_free:
reqsk_free(req);
out_drop:
+ kfree_skb_reason(skb, reason);
return NULL;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 888676163e..c060285ff4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_doulongvec_minmax,
.extra2 = &ioam6_id_wide_max,
},
- { }
};
static struct ctl_table ipv6_rotable[] = {
@@ -248,11 +247,11 @@ static struct ctl_table ipv6_rotable[] = {
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
- { }
};
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv6_table_template);
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
@@ -264,7 +263,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_table)
goto out;
/* Update the variables to point into the current struct net */
- for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++)
+ for (i = 0; i < table_size; i++)
ipv6_table[i].data += (void *)net - (void *)&init_net;
ipv6_route_table = ipv6_route_sysctl_init(net);
@@ -276,8 +275,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
goto out_ipv6_route_table;
net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6",
- ipv6_table,
- ARRAY_SIZE(ipv6_table_template));
+ ipv6_table, table_size);
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
@@ -313,9 +311,9 @@ out_ipv6_table:
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
{
- struct ctl_table *ipv6_table;
- struct ctl_table *ipv6_route_table;
- struct ctl_table *ipv6_icmp_table;
+ const struct ctl_table *ipv6_table;
+ const struct ctl_table *ipv6_route_table;
+ const struct ctl_table *ipv6_icmp_table;
ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57b25b1fc9..3385faf1d5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -58,7 +58,9 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -68,7 +70,8 @@
#include <trace/events/tcp.h>
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -94,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
rcu_assign_pointer(sk->sk_rx_dst, dst);
sk->sk_rx_dst_ifindex = skb->skb_iif;
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
}
@@ -489,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
- sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
case TCP_LISTEN:
break;
@@ -792,7 +789,8 @@ clear_hash_nostart:
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u32 tw_isn)
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
@@ -806,7 +804,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
+ if (!tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
@@ -819,9 +817,10 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
- tcp_v6_init_req(req, sk, skb);
+ tcp_v6_init_req(req, sk, skb, tw_isn);
if (security_inet_conn_request(sk, skb, req))
return NULL;
@@ -1005,7 +1004,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
kfree_skb(buff);
}
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -1112,7 +1112,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- trace_tcp_send_reset(sk, skb);
if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
priority = READ_ONCE(sk->sk_priority);
@@ -1128,6 +1127,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
+ trace_tcp_send_reset(sk, skb, reason);
+
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
ipv6_get_dsfield(ipv6h), label, priority, txhash,
&key);
@@ -1267,15 +1268,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
@@ -1439,7 +1435,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
@@ -1450,6 +1445,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ ip6_dst_store(newsk, dst, NULL, NULL);
+
newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
@@ -1623,7 +1620,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all)
opt_skb = skb_clone_and_charge_r(skb, sk);
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
@@ -1653,12 +1649,12 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v6_cookie_check(sk, skb);
- if (!nsk)
- goto discard;
-
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb))
- goto reset;
+ if (nsk) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason)
+ goto reset;
+ }
if (opt_skb)
__kfree_skb(opt_skb);
return 0;
@@ -1666,14 +1662,15 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb))
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason)
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
return 0;
reset:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
if (opt_skb)
__kfree_skb(opt_skb);
@@ -1737,7 +1734,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -1754,6 +1750,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
+ u32 isn;
struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -1790,7 +1787,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -1856,15 +1852,21 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v6_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v6_send_reset(nsk, skb, rst_reason);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
}
+process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
@@ -1933,7 +1935,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(NULL, skb);
+ tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -1961,7 +1963,7 @@ do_time_wait:
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1979,6 +1981,7 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -1988,7 +1991,7 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
@@ -2038,7 +2041,6 @@ void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor = tcp_twsk_destructor,
};
@@ -2365,11 +2367,6 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-static const struct inet6_protocol tcpv6_protocol = {
- .handler = tcp_v6_rcv,
- .err_handler = tcp_v6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
@@ -2391,22 +2388,21 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
- tcp_twsk_purge(net_exit_list, AF_INET6);
-}
-
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
- .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
if (ret)
goto out;
@@ -2431,7 +2427,7 @@ out_tcpv6_pernet_subsys:
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
goto out;
}
@@ -2439,5 +2435,5 @@ void tcpv6_exit(void)
{
unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index bf0c957e4b..23971903e6 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,31 +7,84 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ hdr = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
- return tcp_gro_receive(head, skb);
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -40,6 +93,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return 0;
}
+static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+
+ if (*oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+}
+
+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct ipv6hdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct ipv6hdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ iph2->saddr = iph->saddr;
+ iph2->daddr = iph->daddr;
+ __tcpv6_gso_segment_csum(seg, &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv6_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -51,6 +159,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp6_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -66,15 +177,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
return tcp_gso_segment(skb, features);
}
-static const struct net_offload tcpv6_offload = {
- .callbacks = {
- .gso_segment = tcp6_gso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
- },
-};
int __init tcpv6_offload_init(void)
{
- return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+ net_hotdata.tcpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp6_gso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8c14c4cc82..c81a07ac04 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/indirect_call_wrapper.h>
+#include <trace/events/udp.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -79,9 +80,6 @@ u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *faddr,
const __be16 fport)
{
- static u32 udp6_ehash_secret __read_mostly;
- static u32 udp_ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&udp6_ehash_secret,
@@ -171,15 +169,21 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
+ bool need_rescore;
result = NULL;
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ need_rescore = false;
+rescore:
+ score = compute_score(need_rescore ? result : sk, net, saddr,
+ sport, daddr, hnum, dif, sdif);
if (score > badness) {
badness = score;
+ if (need_rescore)
+ continue;
+
if (sk->sk_state == TCP_ESTABLISHED) {
result = sk;
continue;
@@ -200,8 +204,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
if (IS_ERR(result))
continue;
- badness = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ /* compute_score is too long of a function to be
+ * inlined, and calling it again here yields
+ * measureable overhead for some
+ * workloads. Work around it by jumping
+ * backwards to rescore 'result'.
+ */
+ need_rescore = true;
+ goto rescore;
}
}
return result;
@@ -275,7 +285,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
@@ -661,8 +672,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -900,11 +911,8 @@ start_lookup:
static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
- if (udp_sk_rx_dst_set(sk, dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
- }
+ if (udp_sk_rx_dst_set(sk, dst))
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
@@ -1101,11 +1109,12 @@ void udp_v6_early_demux(struct sk_buff *skb)
else
return;
- if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+ if (!sk)
return;
skb->sk = sk;
- skb->destructor = sock_efree;
+ DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+ skb->destructor = sock_pfree;
dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
@@ -1574,7 +1583,7 @@ back_from_confirm:
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- (struct rt6_info *)dst,
+ dst_rt6_info(dst),
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
@@ -1601,7 +1610,7 @@ do_append_data:
ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, dst_rt6_info(dst),
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
@@ -1703,11 +1712,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-static const struct inet6_protocol udpv6_protocol = {
- .handler = udpv6_rcv,
- .err_handler = udpv6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
@@ -1804,7 +1808,12 @@ int __init udpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+ net_hotdata.udpv6_protocol = (struct inet6_protocol) {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
if (ret)
goto out;
@@ -1815,12 +1824,12 @@ out:
return ret;
out_udpv6_protocol:
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
goto out;
}
void udpv6_exit(void)
{
inet6_unregister_protosw(&udpv6_protosw);
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 626d7b362d..b41152dd42 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -164,7 +164,8 @@ flush:
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
@@ -186,20 +187,19 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
-static const struct net_offload udpv6_offload = {
- .callbacks = {
- .gso_segment = udp6_ufo_fragment,
- .gro_receive = udp6_gro_receive,
- .gro_complete = udp6_gro_complete,
- },
-};
-
-int udpv6_offload_init(void)
+int __init udpv6_offload_init(void)
{
- return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+ net_hotdata.udpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
int udpv6_offload_exit(void)
{
- return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+ return inet6_del_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 6e36e5047f..4abc5e9d63 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- int nhlen = skb->data - skb_network_header(skb);
+ int nhlen = -skb_network_offset(skb);
skb_network_header(skb)[IP6CB(skb)->nhoff] =
XFRM_MODE_SKB_CB(skb)->protocol;
@@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -109,19 +113,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
@@ -279,6 +270,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
if (!x)
continue;
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
spin_lock(&x->lock);
if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 42fb6996b0..2f1ea5f999 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif,
{
struct dst_entry *dst;
struct net_device *dev;
+ struct inet6_dev *idev;
dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
- dev = ip6_dst_idev(dst)->dev;
+ idev = ip6_dst_idev(dst);
+ if (!idev) {
+ dst_release(dst);
+ return -EHOSTUNREACH;
+ }
+ dev = idev->dev;
ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
dst_release(dst);
return 0;
@@ -70,7 +76,7 @@ static int xfrm6_get_saddr(struct net *net, int oif,
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rt6_info *rt = (struct rt6_info *)xdst->route;
+ struct rt6_info *rt = dst_rt6_info(xdst->route);
xdst->u.dst.dev = dev;
netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
@@ -184,7 +190,6 @@ static struct ctl_table xfrm6_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int __net_init xfrm6_net_sysctl_init(struct net *net)
@@ -218,7 +223,7 @@ err_alloc:
static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!net->ipv6.sysctl.xfrm6_hdr)
return;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f6cb94f82c..bf140ef781 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -355,10 +355,7 @@ static int __init xfrm6_tunnel_init(void)
{
int rv;
- xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
- sizeof(struct xfrm6_tunnel_spi),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ xfrm6_tunnel_spi_kmem = KMEM_CACHE(xfrm6_tunnel_spi, SLAB_HWCACHE_ALIGN);
if (!xfrm6_tunnel_spi_kmem)
return -ENOMEM;
rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 498a0c35b7..c00323fa9e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -184,7 +184,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
@@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
struct iucv_sock *iucv = iucv_sk(sk);
struct iucv_path *path = iucv->path;
- if (iucv->path) {
- iucv->path = NULL;
+ /* Whoever resets the path pointer, must sever and free it. */
+ if (xchg(&iucv->path, NULL)) {
if (with_user_data) {
low_nmcpy(user_data, iucv->src_name);
high_nmcpy(user_data, iucv->dst_name);
@@ -795,7 +795,7 @@ done:
/* Accept a pending connection */
static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *nsk;
@@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
@@ -1060,13 +1060,12 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
int i;
/* skip iucv_array lying in the headroom */
- iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].address = virt_to_dma32(skb->data);
iba[0].length = (u32)skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- iba[i + 1].address =
- (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].address = virt_to_dma32(skb_frag_address(frag));
iba[i + 1].length = (u32)skb_frag_size(frag);
}
err = pr_iucv->message_send(iucv->path, &txmsg,
@@ -1162,13 +1161,12 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
struct iucv_array *iba = (struct iucv_array *)skb->head;
int i;
- iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].address = virt_to_dma32(skb->data);
iba[0].length = (u32)skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- iba[i + 1].address =
- (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].address = virt_to_dma32(skb_frag_address(frag));
iba[i + 1].length = (u32)skb_frag_size(frag);
}
rc = pr_iucv->message_receive(path, msg,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index b0b3e9c5af..b7bf34a5eb 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -73,8 +73,42 @@ const struct bus_type iucv_bus = {
};
EXPORT_SYMBOL(iucv_bus);
-struct device *iucv_root;
-EXPORT_SYMBOL(iucv_root);
+static struct device *iucv_root;
+
+static void iucv_release_device(struct device *device)
+{
+ kfree(device);
+}
+
+struct device *iucv_alloc_device(const struct attribute_group **attrs,
+ struct device_driver *driver,
+ void *priv, const char *fmt, ...)
+{
+ struct device *dev;
+ va_list vargs;
+ int rc;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto out_error;
+ va_start(vargs, fmt);
+ rc = dev_set_name(dev, fmt, vargs);
+ va_end(vargs);
+ if (rc)
+ goto out_error;
+ dev->bus = &iucv_bus;
+ dev->parent = iucv_root;
+ dev->driver = driver;
+ dev->groups = attrs;
+ dev->release = iucv_release_device;
+ dev_set_drvdata(dev, priv);
+ return dev;
+
+out_error:
+ kfree(dev);
+ return NULL;
+}
+EXPORT_SYMBOL(iucv_alloc_device);
static int iucv_available;
@@ -210,7 +244,7 @@ struct iucv_cmd_dpl {
u8 iprmmsg[8];
u32 ipsrccls;
u32 ipmsgtag;
- u32 ipbfadr2;
+ dma32_t ipbfadr2;
u32 ipbfln2f;
u32 res;
} __attribute__ ((packed,aligned(8)));
@@ -226,11 +260,11 @@ struct iucv_cmd_db {
u8 iprcode;
u32 ipmsgid;
u32 iptrgcls;
- u32 ipbfadr1;
+ dma32_t ipbfadr1;
u32 ipbfln1f;
u32 ipsrccls;
u32 ipmsgtag;
- u32 ipbfadr2;
+ dma32_t ipbfadr2;
u32 ipbfln2f;
u32 res;
} __attribute__ ((packed,aligned(8)));
@@ -286,6 +320,7 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
*/
static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
{
+ unsigned long reg1 = virt_to_phys(parm);
int cc;
asm volatile(
@@ -296,7 +331,7 @@ static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
" srl %[cc],28\n"
: [cc] "=&d" (cc), "+m" (*parm)
: [reg0] "d" ((unsigned long)command),
- [reg1] "d" ((unsigned long)parm)
+ [reg1] "d" (reg1)
: "cc", "0", "1");
return cc;
}
@@ -431,7 +466,7 @@ static void iucv_declare_cpu(void *data)
/* Declare interrupt buffer. */
parm = iucv_param_irq[cpu];
memset(parm, 0, sizeof(union iucv_param));
- parm->db.ipbfadr1 = virt_to_phys(iucv_irq_data[cpu]);
+ parm->db.ipbfadr1 = virt_to_dma32(iucv_irq_data[cpu]);
rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
if (rc) {
char *err = "Unknown";
@@ -519,7 +554,7 @@ static void iucv_setmask_mp(void)
*/
static void iucv_setmask_up(void)
{
- cpumask_t cpumask;
+ static cpumask_t cpumask;
int cpu;
/* Disable all cpu but the first in cpu_irq_cpumask. */
@@ -627,23 +662,33 @@ static int iucv_cpu_online(unsigned int cpu)
static int iucv_cpu_down_prep(unsigned int cpu)
{
- cpumask_t cpumask;
+ cpumask_var_t cpumask;
+ int ret = 0;
if (!iucv_path_table)
return 0;
- cpumask_copy(&cpumask, &iucv_buffer_cpumask);
- cpumask_clear_cpu(cpu, &cpumask);
- if (cpumask_empty(&cpumask))
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(cpumask, &iucv_buffer_cpumask);
+ cpumask_clear_cpu(cpu, cpumask);
+ if (cpumask_empty(cpumask)) {
/* Can't offline last IUCV enabled cpu. */
- return -EINVAL;
+ ret = -EINVAL;
+ goto __free_cpumask;
+ }
iucv_retrieve_cpu(NULL);
if (!cpumask_empty(&iucv_irq_cpumask))
- return 0;
+ goto __free_cpumask;
+
smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
iucv_allow_cpu, NULL, 1);
- return 0;
+
+__free_cpumask:
+ free_cpumask_var(cpumask);
+ return ret;
}
/**
@@ -1080,8 +1125,7 @@ static int iucv_message_receive_iprmdata(struct iucv_path *path,
size = (size < 8) ? size : 8;
for (array = buffer; size > 0; array++) {
copy = min_t(size_t, size, array->length);
- memcpy((u8 *)(addr_t) array->address,
- rmmsg, copy);
+ memcpy(dma32_to_virt(array->address), rmmsg, copy);
rmmsg += copy;
size -= copy;
}
@@ -1123,7 +1167,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
parm = iucv_param[smp_processor_id()];
memset(parm, 0, sizeof(union iucv_param));
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = virt_to_dma32(buffer);
parm->db.ipbfln1f = (u32) size;
parm->db.ipmsgid = msg->id;
parm->db.ippathid = path->pathid;
@@ -1241,7 +1285,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.iptrgcls = msg->class;
memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8));
} else {
- parm->db.ipbfadr1 = (u32)(addr_t) reply;
+ parm->db.ipbfadr1 = virt_to_dma32(reply);
parm->db.ipbfln1f = (u32) size;
parm->db.ippathid = path->pathid;
parm->db.ipflags1 = flags;
@@ -1293,7 +1337,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.ipmsgtag = msg->tag;
memcpy(parm->dpl.iprmmsg, buffer, 8);
} else {
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = virt_to_dma32(buffer);
parm->db.ipbfln1f = (u32) size;
parm->db.ippathid = path->pathid;
parm->db.ipflags1 = flags | IUCV_IPNORPY;
@@ -1378,7 +1422,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
parm->dpl.iptrgcls = msg->class;
parm->dpl.ipsrccls = srccls;
parm->dpl.ipmsgtag = msg->tag;
- parm->dpl.ipbfadr2 = (u32)(addr_t) answer;
+ parm->dpl.ipbfadr2 = virt_to_dma32(answer);
parm->dpl.ipbfln2f = (u32) asize;
memcpy(parm->dpl.iprmmsg, buffer, 8);
} else {
@@ -1387,9 +1431,9 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
parm->db.iptrgcls = msg->class;
parm->db.ipsrccls = srccls;
parm->db.ipmsgtag = msg->tag;
- parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+ parm->db.ipbfadr1 = virt_to_dma32(buffer);
parm->db.ipbfln1f = (u32) size;
- parm->db.ipbfadr2 = (u32)(addr_t) answer;
+ parm->db.ipbfadr2 = virt_to_dma32(answer);
parm->db.ipbfln2f = (u32) asize;
}
rc = iucv_call_b2f0(IUCV_SEND, parm);
@@ -1903,6 +1947,6 @@ static void __exit iucv_exit(void)
subsys_initcall(iucv_init);
module_exit(iucv_exit);
-MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
+MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert <felfert@millenux.com>");
MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
MODULE_LICENSE("GPL");
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index eda933c097..2f191e50d4 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -627,7 +627,8 @@ retry:
skb = txm->frag_skb;
}
- if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
+ if (WARN_ON(!skb_shinfo(skb)->nr_frags) ||
+ WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
ret = -EINVAL;
goto out;
}
@@ -637,8 +638,8 @@ retry:
msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
- skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
- msize);
+ (const struct bio_vec *)skb_shinfo(skb)->frags,
+ skb_shinfo(skb)->nr_frags, msize);
iov_iter_advance(&msg.msg_iter, txm->frag_offset);
do {
@@ -1878,15 +1879,11 @@ static int __init kcm_init(void)
{
int err = -ENOMEM;
- kcm_muxp = kmem_cache_create("kcm_mux_cache",
- sizeof(struct kcm_mux), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ kcm_muxp = KMEM_CACHE(kcm_mux, SLAB_HWCACHE_ALIGN);
if (!kcm_muxp)
goto fail;
- kcm_psockp = kmem_cache_create("kcm_psock_cache",
- sizeof(struct kcm_psock), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ kcm_psockp = KMEM_CACHE(kcm_psock, SLAB_HWCACHE_ALIGN);
if (!kcm_psockp)
goto fail;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8d21ff25f1..7ea4adf81d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -88,6 +88,11 @@
/* Default trace flags */
#define L2TP_DEFAULT_DEBUG_FLAGS 0
+#define L2TP_DEPTH_NESTING 2
+#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
+#error "L2TP requires its own lockdep subclass"
+#endif
+
/* Private data stored for received packets in the skb.
*/
struct l2tp_skb_cb {
@@ -794,6 +799,7 @@ static void l2tp_session_queue_purge(struct l2tp_session *session)
static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
{
struct l2tp_session *session = NULL;
+ struct l2tp_tunnel *orig_tunnel = tunnel;
unsigned char *ptr, *optr;
u16 hdrflags;
u32 tunnel_id, session_id;
@@ -819,13 +825,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Get L2TP header flags */
hdrflags = ntohs(*(__be16 *)ptr);
- /* Check protocol version */
+ /* Get protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
- if (version != tunnel->version) {
- pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto invalid;
- }
/* Get length of L2TP packet */
length = skb->len;
@@ -837,7 +838,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Skip flags */
ptr += 2;
- if (tunnel->version == L2TP_HDR_VER_2) {
+ if (version == L2TP_HDR_VER_2) {
/* If length is present, skip it */
if (hdrflags & L2TP_HDRFLAG_L)
ptr += 2;
@@ -845,6 +846,20 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Extract tunnel and session ID */
tunnel_id = ntohs(*(__be16 *)ptr);
ptr += 2;
+
+ if (tunnel_id != tunnel->tunnel_id) {
+ /* We are receiving trafic for another tunnel, probably
+ * because we have several tunnels between the same
+ * IP/port quadruple, look it up.
+ */
+ struct l2tp_tunnel *alt_tunnel;
+
+ alt_tunnel = l2tp_tunnel_get(tunnel->l2tp_net, tunnel_id);
+ if (!alt_tunnel)
+ goto pass;
+ tunnel = alt_tunnel;
+ }
+
session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
} else {
@@ -854,6 +869,13 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 4;
}
+ /* Check protocol version */
+ if (version != tunnel->version) {
+ pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+ tunnel->name, version, tunnel->version);
+ goto invalid;
+ }
+
/* Find the session context */
session = l2tp_tunnel_get_session(tunnel, session_id);
if (!session || !session->recv_skb) {
@@ -875,6 +897,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
+ if (tunnel != orig_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
+
return 0;
invalid:
@@ -884,25 +909,26 @@ pass:
/* Put UDP header back */
__skb_push(skb, sizeof(struct udphdr));
+ if (tunnel != orig_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
+
return 1;
}
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
+/* UDP encapsulation receive and error receive handlers.
+ * See net/ipv4/udp.c for details.
+ *
+ * Note that these functions are called from inside an
+ * RCU-protected region, but without the socket being locked.
+ *
+ * Hence we use rcu_dereference_sk_user_data to access the
+ * tunnel data structure rather the usual l2tp_sk_to_tunnel
+ * accessor function.
*/
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- /* Note that this is called from the encap_rcv hook inside an
- * RCU-protected region, but without the socket being locked.
- * Hence we use rcu_dereference_sk_user_data to access the
- * tunnel data structure rather the usual l2tp_sk_to_tunnel
- * accessor function.
- */
tunnel = rcu_dereference_sk_user_data(sk);
if (!tunnel)
goto pass_up;
@@ -919,6 +945,29 @@ pass_up:
}
EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
+{
+ struct l2tp_tunnel *tunnel;
+
+ tunnel = rcu_dereference_sk_user_data(sk);
+ if (!tunnel || tunnel->fd < 0)
+ return;
+
+ sk->sk_err = err;
+ sk_error_report(sk);
+
+ if (ip_hdr(skb)->version == IPVERSION) {
+ if (inet_test_bit(RECVERR, sk))
+ return ip_icmp_error(sk, skb, err, port, info, payload);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (inet6_test_bit(RECVERR6, sk))
+ return ipv6_icmp_error(sk, skb, err, port, info, payload);
+#endif
+ }
+}
+
/************************************************************************
* Transmit handling
***********************************************************************/
@@ -1041,7 +1090,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
- bh_lock_sock_nested(sk);
+ /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by
+ * nested socket calls on the same lockdep socket class. This can
+ * happen when data from a user socket is routed over l2tp, which uses
+ * another userspace socket.
+ */
+ spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING);
+
if (sock_owned_by_user(sk)) {
kfree_skb(skb);
ret = NET_XMIT_DROP;
@@ -1093,7 +1148,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl);
out_unlock:
- bh_unlock_sock(sk);
+ spin_unlock(&sk->sk_lock.slock);
return ret;
}
@@ -1493,6 +1548,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
.sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
+ .encap_err_rcv = l2tp_udp_encap_err_recv,
.encap_destroy = l2tp_udp_encap_destroy,
};
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 25ca89f804..8ba00ad433 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -100,7 +100,7 @@ static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
};
-static struct device_type l2tpeth_type = {
+static const struct device_type l2tpeth_type = {
.name = "l2tpeth",
};
@@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
/* checksums verified by L2TP */
skb->ip_summed = CHECKSUM_NONE;
+ /* drop outer flow-hash */
+ skb_clear_hash(skb);
+
skb_dst_drop(skb);
nf_reset_ct(skb);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 9a2a9ed3ba..19c8cc5289 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
rcu_read_lock();
if (!rt) {
@@ -478,7 +478,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = ip_route_output_ports(sock_net(sk), fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7bf14cf9ff..8780ec64f3 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -630,7 +630,7 @@ back_from_confirm:
ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
- &fl6, (struct rt6_info *)dst,
+ &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index fde1140d89..4eb52add71 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
* llc_ui_accept - accept a new incoming connection.
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
- * @flags: User specified operational flags.
- * @kern: If the socket is kernel internal
+ * @arg: User specified arguments
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 8443a6d841..72e101135f 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -44,11 +44,6 @@ static struct ctl_table llc2_timeout_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
-};
-
-static struct ctl_table llc_station_table[] = {
- { },
};
static struct ctl_table_header *llc2_timeout_header;
@@ -56,8 +51,9 @@ static struct ctl_table_header *llc_station_header;
int __init llc_sysctl_init(void)
{
+ struct ctl_table empty[1] = {};
llc2_timeout_header = register_net_sysctl(&init_net, "net/llc/llc2/timeout", llc2_timeout_table);
- llc_station_header = register_net_sysctl(&init_net, "net/llc/station", llc_station_table);
+ llc_station_header = register_net_sysctl_sz(&init_net, "net/llc/station", empty, 0);
if (!llc2_timeout_header || !llc_station_header) {
llc_sysctl_exit();
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4406b4f8f3..a33884967f 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -29,7 +29,7 @@ mac80211-y := \
spectmgmt.o \
tx.o \
key.o \
- util.o \
+ util.o parse.o \
wme.o \
chan.o \
trace.o mlme.o \
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b8a278355e..677bbbac9f 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -616,7 +616,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
if (!pubsta->deflink.ht_cap.ht_supported &&
- sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
+ !pubsta->deflink.vht_cap.vht_supported &&
+ !pubsta->deflink.he_cap.has_he &&
+ !pubsta->deflink.eht_cap.has_eht)
return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1d43a80064..87a7b569cc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
/* apply all changes now - no failures allowed */
- if (monitor_sdata)
+ if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
ieee80211_set_mu_mimo_follow(monitor_sdata, params);
if (params->flags) {
@@ -886,33 +886,32 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_sub_if_data *sdata;
- int ret = 0;
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
+ int ret;
lockdep_assert_wiphy(local->hw.wiphy);
- if (cfg80211_chandef_identical(&local->monitor_chandef, chandef))
+ if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
+ &chanreq.oper))
return 0;
- if (local->use_chanctx) {
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (sdata) {
- ieee80211_link_release_channel(&sdata->deflink);
- ret = ieee80211_link_use_channel(&sdata->deflink,
- chandef,
- IEEE80211_CHANCTX_EXCLUSIVE);
- }
- } else {
- if (local->open_count == local->monitors) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
- }
- }
+ sdata = wiphy_dereference(local->hw.wiphy,
+ local->monitor_sdata);
+ if (!sdata)
+ goto done;
- if (ret == 0)
- local->monitor_chandef = *chandef;
+ if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
+ &chanreq.oper))
+ return 0;
- return ret;
+ ieee80211_link_release_channel(&sdata->deflink);
+ ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
+ IEEE80211_CHANCTX_EXCLUSIVE);
+ if (ret)
+ return ret;
+done:
+ local->monitor_chanreq = chanreq;
+ return 0;
}
static int
@@ -953,7 +952,8 @@ ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
struct cfg80211_fils_discovery *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct fils_discovery_data *new, *old = NULL;
struct ieee80211_fils_discovery *fd;
@@ -980,7 +980,8 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
}
- return BSS_CHANGED_FILS_DISCOVERY;
+ *changed |= BSS_CHANGED_FILS_DISCOVERY;
+ return 0;
}
static int
@@ -1240,6 +1241,30 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_link_data *link;
+ u8 link_id, num = 0;
+
+ if (sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_P2P_GO)
+ return num;
+
+ if (!sdata->vif.valid_links)
+ return num;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
+ if (sdata_dereference(link->u.ap.beacon, sdata))
+ num++;
+ }
+
+ return num;
+}
+
static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *params)
{
@@ -1258,6 +1283,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
unsigned int link_id = params->beacon.link_id;
struct ieee80211_link_data *link;
struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_chan_req chanreq = { .oper = params->chandef };
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1341,8 +1367,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return -EOPNOTSUPP;
link_conf->eht_support = true;
- link_conf->eht_puncturing = params->punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
link_conf->eht_su_beamformer =
params->eht_cap->fixed.phy_cap_info[0] &
@@ -1370,7 +1394,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return err;
}
- err = ieee80211_link_use_channel(link, &params->chandef,
+ err = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (!err)
ieee80211_link_copy_chanctx_to_vlans(link, false);
@@ -1445,10 +1469,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
goto error;
err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
@@ -1463,7 +1486,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
kfree_rcu(old, rcu_head);
RCU_INIT_POINTER(link->u.ap.beacon, NULL);
- sdata->u.ap.active = false;
+
+ if (ieee80211_num_beaconing_links(sdata) == 0)
+ sdata->u.ap.active = false;
+
goto error;
}
@@ -1471,7 +1497,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID);
ieee80211_link_info_change_notify(sdata, link, changed);
- netif_carrier_on(dev);
+ if (ieee80211_num_beaconing_links(sdata) <= 1)
+ netif_carrier_on(dev);
+
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_on(vlan->dev);
@@ -1519,10 +1547,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
return err;
err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
@@ -1565,6 +1592,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_link_data *link =
sdata_dereference(sdata->link[link_id], sdata);
struct ieee80211_bss_conf *link_conf = link->conf;
+ LIST_HEAD(keys);
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1582,10 +1610,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
/* abort any running channel switch or color change */
link_conf->csa_active = false;
link_conf->color_change_active = false;
- if (link->csa_block_tx) {
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link->csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
ieee80211_free_next_beacon(link);
@@ -1593,10 +1621,13 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
/* turn off carrier for this interface and dependent VLANs */
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_off(vlan->dev);
- netif_carrier_off(dev);
+
+ if (ieee80211_num_beaconing_links(sdata) <= 1) {
+ netif_carrier_off(dev);
+ sdata->u.ap.active = false;
+ }
/* remove beacon and probe response */
- sdata->u.ap.active = false;
RCU_INIT_POINTER(link->u.ap.beacon, NULL);
RCU_INIT_POINTER(link->u.ap.probe_resp, NULL);
RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
@@ -1618,8 +1649,13 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
link_conf->ema_ap = false;
link_conf->bssid_indicator = 0;
- __sta_info_flush(sdata, true);
- ieee80211_free_keys(sdata, true);
+ __sta_info_flush(sdata, true, link_id);
+
+ ieee80211_remove_link_keys(link, &keys);
+ if (!list_empty(&keys)) {
+ synchronize_net();
+ ieee80211_free_key_list(local, &keys);
+ }
link_conf->enable_beacon = false;
sdata->beacon_rate_set = false;
@@ -1629,7 +1665,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_BEACON_ENABLED);
if (sdata->wdev.cac_started) {
- chandef = link_conf->chandef;
+ chandef = link_conf->chanreq.oper;
wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
@@ -1829,7 +1865,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
if (params->supported_rates &&
params->supported_rates_len) {
- ieee80211_parse_bitrates(link->conf->chandef.width,
+ ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
sband, params->supported_rates,
params->supported_rates_len,
&link_sta->pub->supp_rates[sband->band]);
@@ -1944,6 +1980,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
clear_sta_flag(sta, WLAN_STA_TDLS_PEER);
}
+ if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU);
+
/* mark TDLS channel switch support, if the AP allows it */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
!sdata->deflink.u.mgd.tdls_chan_switch_prohibited &&
@@ -2095,7 +2134,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
if (params->mac)
return sta_info_destroy_addr_bss(sdata, params->mac);
- sta_info_flush(sdata);
+ sta_info_flush(sdata, params->link_id);
return 0;
}
@@ -2601,6 +2640,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
const struct mesh_setup *setup)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = setup->chandef };
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
int err;
@@ -2617,7 +2657,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
return err;
@@ -2660,7 +2700,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
return -EINVAL;
if (params->basic_rates) {
- if (!ieee80211_parse_bitrates(link->conf->chandef.width,
+ if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
wiphy->bands[sband->band],
params->basic_rates,
params->basic_rates_len,
@@ -2918,8 +2958,9 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
sizeof(int) * NUM_NL80211_BANDS);
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_MCAST_RATE);
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_MCAST_RATE);
return 0;
}
@@ -2997,6 +3038,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return -EOPNOTSUPP;
+
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (!sdata)
@@ -3059,7 +3103,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
if (has_monitor) {
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
sdata->deflink.user_power_level = local->user_power_level;
if (txp_type != sdata->vif.bss_conf.txpower_type)
update_txp_type = true;
@@ -3082,7 +3126,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy,
if (local->ops->get_txpower)
return drv_get_txpower(local, sdata, dbm);
- if (!local->use_chanctx)
+ if (local->emulate_chanctx)
*dbm = local->hw.conf.power_level;
else
*dbm = sdata->vif.bss_conf.txpower;
@@ -3152,8 +3196,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return -EINVAL;
- if (ieee80211_vif_is_mld(&sdata->vif) &&
- !(sdata->vif.active_links & BIT(link->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
return 0;
old_req = link->u.mgd.req_smps;
@@ -3175,7 +3218,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
* the new value until we associate.
*/
if (!sdata->u.mgd.associated ||
- link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT)
return 0;
ap = sdata->vif.cfg.ap_addr;
@@ -3206,7 +3249,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
if (err)
link->u.mgd.req_smps = old_req;
else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found)
- ieee80211_teardown_tdls_peers(sdata);
+ ieee80211_teardown_tdls_peers(link);
return err;
}
@@ -3253,33 +3296,57 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
+static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ s32 rssi_thold, u32 rssi_hyst,
+ s32 rssi_low, s32 rssi_high)
+{
+ struct ieee80211_bss_conf *conf;
+
+ if (!link || !link->conf)
+ return;
+
+ conf = link->conf;
+
+ if (rssi_thold && rssi_hyst &&
+ rssi_thold == conf->cqm_rssi_thold &&
+ rssi_hyst == conf->cqm_rssi_hyst)
+ return;
+
+ conf->cqm_rssi_thold = rssi_thold;
+ conf->cqm_rssi_hyst = rssi_hyst;
+ conf->cqm_rssi_low = rssi_low;
+ conf->cqm_rssi_high = rssi_high;
+ link->u.mgd.last_cqm_event_signal = 0;
+
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
+ return;
+
+ if (sdata->u.mgd.associated &&
+ (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+ ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM);
+}
+
static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
struct net_device *dev,
s32 rssi_thold, u32 rssi_hyst)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_vif *vif = &sdata->vif;
- struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
-
- if (rssi_thold == bss_conf->cqm_rssi_thold &&
- rssi_hyst == bss_conf->cqm_rssi_hyst)
- return 0;
+ int link_id;
- if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER &&
- !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+ if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER &&
+ !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
return -EOPNOTSUPP;
- bss_conf->cqm_rssi_thold = rssi_thold;
- bss_conf->cqm_rssi_hyst = rssi_hyst;
- bss_conf->cqm_rssi_low = 0;
- bss_conf->cqm_rssi_high = 0;
- sdata->deflink.u.mgd.last_cqm_event_signal = 0;
+ /* For MLD, handle CQM change on all the active links */
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
- /* tell the driver upon association, unless already associated */
- if (sdata->u.mgd.associated &&
- sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_CQM);
+ ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst,
+ 0, 0);
+ }
return 0;
}
@@ -3290,22 +3357,19 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_vif *vif = &sdata->vif;
- struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ int link_id;
- if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
+ if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER)
return -EOPNOTSUPP;
- bss_conf->cqm_rssi_low = rssi_low;
- bss_conf->cqm_rssi_high = rssi_high;
- bss_conf->cqm_rssi_thold = 0;
- bss_conf->cqm_rssi_hyst = 0;
- sdata->deflink.u.mgd.last_cqm_event_signal = 0;
+ /* For MLD, handle CQM change on all the active links */
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
- /* tell the driver upon association, unless already associated */
- if (sdata->u.mgd.associated &&
- sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_CQM);
+ ieee80211_set_cqm_rssi_link(sdata, link, 0, 0,
+ rssi_low, rssi_high);
+ }
return 0;
}
@@ -3330,9 +3394,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
* so at a basic rate so that all clients can receive it.
*/
if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) &&
- sdata->vif.bss_conf.chandef.chan) {
+ sdata->vif.bss_conf.chanreq.oper.chan) {
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band;
+ enum nl80211_band band;
+
+ band = sdata->vif.bss_conf.chanreq.oper.chan->band;
if (!(mask->control[band].legacy & basic_rates))
return -EINVAL;
@@ -3384,6 +3450,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
u32 cac_time_ms)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
struct ieee80211_local *local = sdata->local;
int err;
@@ -3398,7 +3465,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
goto out_unlock;
@@ -3541,13 +3608,24 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
return new_beacon;
}
-void ieee80211_csa_finish(struct ieee80211_vif *vif)
+void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
+
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return;
rcu_read_lock();
+ link_data = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link_data)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* TODO: MBSSID with MLO changes */
if (vif->mbssid_tx_vif == vif) {
/* Trigger ieee80211_csa_finish() on the non-transmitting
* interfaces when channel switch is received on
@@ -3566,7 +3644,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif)
&iter->deflink.csa_finalize_work);
}
}
- wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
+ wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work);
rcu_read_unlock();
}
@@ -3578,26 +3656,27 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- sdata->deflink.csa_block_tx = block_tx;
+ sdata->csa_blocked_queues = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work);
}
EXPORT_SYMBOL(ieee80211_channel_switch_disconnect);
-static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link_data->u.ap.next_beacon)
return -EINVAL;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
- sdata->deflink.u.ap.next_beacon,
+ err = ieee80211_assign_beacon(sdata, link_data,
+ link_data->u.ap.next_beacon,
NULL, NULL, changed);
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
if (err < 0)
return err;
@@ -3626,6 +3705,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
{
struct ieee80211_sub_if_data *sdata = link_data->sdata;
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss_conf *link_conf = link_data->conf;
u64 changed = 0;
int err;
@@ -3647,40 +3727,33 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
if (link_data->reserved_ready)
return 0;
- return ieee80211_link_use_reserved_context(&sdata->deflink);
+ return ieee80211_link_use_reserved_context(link_data);
}
- if (!cfg80211_chandef_identical(&link_data->conf->chandef,
- &link_data->csa_chandef))
+ if (!cfg80211_chandef_identical(&link_conf->chanreq.oper,
+ &link_data->csa_chanreq.oper))
return -EINVAL;
- sdata->vif.bss_conf.csa_active = false;
+ link_conf->csa_active = false;
- err = ieee80211_set_after_csa_beacon(sdata, &changed);
+ err = ieee80211_set_after_csa_beacon(link_data, &changed);
if (err)
return err;
- if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
- sdata->vif.bss_conf.eht_puncturing =
- sdata->vif.bss_conf.csa_punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-
ieee80211_link_info_change_notify(sdata, link_data, changed);
- if (link_data->csa_block_tx) {
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link_data->csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
err = drv_post_channel_switch(link_data);
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef,
- link_data->link_id,
- link_data->conf->eht_puncturing);
+ cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper,
+ link_data->link_id);
return 0;
}
@@ -3690,7 +3763,8 @@ static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
struct ieee80211_sub_if_data *sdata = link_data->sdata;
if (__ieee80211_csa_finalize(link_data)) {
- sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
+ sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n",
+ link_data->link_id);
cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
GFP_KERNEL);
}
@@ -3715,18 +3789,19 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work)
ieee80211_csa_finalize(link);
}
-static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data,
struct cfg80211_csa_settings *params,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
struct ieee80211_csa_settings csa = {};
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->deflink.u.ap.next_beacon =
+ link_data->u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_after);
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link_data->u.ap.next_beacon)
return -ENOMEM;
/*
@@ -3752,7 +3827,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
IEEE80211_MAX_CNTDWN_COUNTERS_NUM) ||
(params->n_counter_offsets_presp >
IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
return -EINVAL;
}
@@ -3762,11 +3837,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
csa.count = params->count;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ err = ieee80211_assign_beacon(sdata, link_data,
&params->beacon_csa, &csa,
NULL, changed);
if (err < 0) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link_data);
return err;
}
@@ -3813,7 +3888,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
/* changes into another band are not supported */
- if (sdata->vif.bss_conf.chandef.chan->band !=
+ if (sdata->vif.bss_conf.chanreq.oper.chan->band !=
params->chandef.chan->band)
return -EINVAL;
@@ -3847,13 +3922,13 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_color_change_abort(struct ieee80211_link_data *link)
{
- sdata->vif.bss_conf.color_change_active = false;
+ link->conf->color_change_active = false;
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
- cfg80211_color_change_aborted_notify(sdata->dev);
+ cfg80211_color_change_aborted_notify(link->sdata->dev, link->link_id);
}
static int
@@ -3861,11 +3936,17 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chan_req chanreq = { .oper = params->chandef };
struct ieee80211_local *local = sdata->local;
- struct ieee80211_channel_switch ch_switch;
+ struct ieee80211_channel_switch ch_switch = {
+ .link_id = params->link_id,
+ };
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_link_data *link_data;
u64 changed = 0;
+ u8 link_id = params->link_id;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3876,16 +3957,23 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (sdata->wdev.cac_started)
return -EBUSY;
- if (cfg80211_chandef_identical(&params->chandef,
- &sdata->vif.bss_conf.chandef))
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return -EINVAL;
+
+ link_data = wiphy_dereference(wiphy, sdata->link[link_id]);
+ if (!link_data)
+ return -ENOLINK;
+
+ link_conf = link_data->conf;
+
+ if (chanreq.oper.punctured && !link_conf->eht_support)
return -EINVAL;
/* don't allow another channel switch if one is already active. */
- if (sdata->vif.bss_conf.csa_active)
+ if (link_conf->csa_active)
return -EBUSY;
- conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
- lockdep_is_held(&local->hw.wiphy->mtx));
+ conf = wiphy_dereference(wiphy, link_conf->chanctx_conf);
if (!conf) {
err = -EBUSY;
goto out;
@@ -3902,14 +3990,14 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
ch_switch.timestamp = 0;
ch_switch.device_timestamp = 0;
ch_switch.block_tx = params->block_tx;
- ch_switch.chandef = params->chandef;
+ ch_switch.chandef = chanreq.oper;
ch_switch.count = params->count;
err = drv_pre_channel_switch(sdata, &ch_switch);
if (err)
goto out;
- err = ieee80211_link_reserve_chanctx(&sdata->deflink, &params->chandef,
+ err = ieee80211_link_reserve_chanctx(link_data, &chanreq,
chanctx->mode,
params->radar_required);
if (err)
@@ -3918,44 +4006,40 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
/* if reservation is invalid then this will fail */
err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0);
if (err) {
- ieee80211_link_unreserve_chanctx(&sdata->deflink);
+ ieee80211_link_unreserve_chanctx(link_data);
goto out;
}
/* if there is a color change in progress, abort it */
- if (sdata->vif.bss_conf.color_change_active)
- ieee80211_color_change_abort(sdata);
+ if (link_conf->color_change_active)
+ ieee80211_color_change_abort(link_data);
- err = ieee80211_set_csa_beacon(sdata, params, &changed);
+ err = ieee80211_set_csa_beacon(link_data, params, &changed);
if (err) {
- ieee80211_link_unreserve_chanctx(&sdata->deflink);
+ ieee80211_link_unreserve_chanctx(link_data);
goto out;
}
- if (params->punct_bitmap && !sdata->vif.bss_conf.eht_support)
- goto out;
+ link_data->csa_chanreq = chanreq;
+ link_conf->csa_active = true;
- sdata->deflink.csa_chandef = params->chandef;
- sdata->deflink.csa_block_tx = params->block_tx;
- sdata->vif.bss_conf.csa_active = true;
- sdata->vif.bss_conf.csa_punct_bitmap = params->punct_bitmap;
-
- if (sdata->deflink.csa_block_tx)
+ if (params->block_tx &&
+ !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_queues = true;
+ }
cfg80211_ch_switch_started_notify(sdata->dev,
- &sdata->deflink.csa_chandef, 0,
- params->count, params->block_tx,
- sdata->vif.bss_conf.csa_punct_bitmap);
+ &link_data->csa_chanreq.oper, link_id,
+ params->count, params->block_tx);
if (changed) {
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- changed);
- drv_channel_switch_beacon(sdata, &params->chandef);
+ ieee80211_link_info_change_notify(sdata, link_data, changed);
+ drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper);
} else {
/* if the beacon didn't change, we can finalize immediately */
- ieee80211_csa_finalize(&sdata->deflink);
+ ieee80211_csa_finalize(link_data);
}
out:
@@ -4205,15 +4289,12 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (chanctx_conf) {
- *chandef = link->conf->chandef;
+ *chandef = link->conf->chanreq.oper;
ret = 0;
} else if (local->open_count > 0 &&
local->open_count == local->monitors &&
sdata->vif.type == NL80211_IFTYPE_MONITOR) {
- if (local->use_chanctx)
- *chandef = local->monitor_chandef;
- else
- *chandef = local->_oper_chandef;
+ *chandef = local->monitor_chanreq.oper;
ret = 0;
}
out:
@@ -4261,12 +4342,13 @@ static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_link_data *link;
+ struct ieee80211_chan_req chanreq = { .oper = *chandef };
int ret;
u64 changed = 0;
link = sdata_dereference(sdata->link[link_id], sdata);
- ret = ieee80211_link_change_bandwidth(link, chandef, &changed);
+ ret = ieee80211_link_change_chanreq(link, &chanreq, &changed);
if (ret == 0)
ieee80211_link_info_change_notify(sdata, link, changed);
@@ -4588,20 +4670,22 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
}
static int
-ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ieee80211_set_after_color_change_beacon(struct ieee80211_link_data *link,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP: {
int ret;
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link->u.ap.next_beacon)
return -EINVAL;
- ret = ieee80211_assign_beacon(sdata, &sdata->deflink,
- sdata->deflink.u.ap.next_beacon,
+ ret = ieee80211_assign_beacon(sdata, link,
+ link->u.ap.next_beacon,
NULL, NULL, changed);
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
if (ret < 0)
return ret;
@@ -4617,18 +4701,19 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
}
static int
-ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ieee80211_set_color_change_beacon(struct ieee80211_link_data *link,
struct cfg80211_color_change_settings *params,
u64 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_color_change_settings color_change = {};
int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->deflink.u.ap.next_beacon =
+ link->u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_next);
- if (!sdata->deflink.u.ap.next_beacon)
+ if (!link->u.ap.next_beacon)
return -ENOMEM;
if (params->count <= 1)
@@ -4640,11 +4725,11 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
params->counter_offset_presp;
color_change.count = params->count;
- err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ err = ieee80211_assign_beacon(sdata, link,
&params->beacon_color_change,
NULL, &color_change, changed);
if (err < 0) {
- ieee80211_free_next_beacon(&sdata->deflink);
+ ieee80211_free_next_beacon(link);
return err;
}
break;
@@ -4656,16 +4741,18 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
}
static void
-ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
+ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link,
u8 color, int enable, u64 changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- sdata->vif.bss_conf.he_bss_color.color = color;
- sdata->vif.bss_conf.he_bss_color.enabled = enable;
+ link->conf->he_bss_color.color = color;
+ link->conf->he_bss_color.enabled = enable;
changed |= BSS_CHANGED_HE_BSS_COLOR;
- ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
+ ieee80211_link_info_change_notify(sdata, link, changed);
if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) {
struct ieee80211_sub_if_data *child;
@@ -4682,26 +4769,27 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
}
}
-static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_color_change_finalize(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
u64 changed = 0;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- sdata->vif.bss_conf.color_change_active = false;
+ link->conf->color_change_active = false;
- err = ieee80211_set_after_color_change_beacon(sdata, &changed);
+ err = ieee80211_set_after_color_change_beacon(link, &changed);
if (err) {
- cfg80211_color_change_aborted_notify(sdata->dev);
+ cfg80211_color_change_aborted_notify(sdata->dev, link->link_id);
return err;
}
- ieee80211_color_change_bss_config_notify(sdata,
- sdata->vif.bss_conf.color_change_color,
+ ieee80211_color_change_bss_config_notify(link,
+ link->conf->color_change_color,
1, changed);
- cfg80211_color_change_notify(sdata->dev);
+ cfg80211_color_change_notify(sdata->dev, link->link_id);
return 0;
}
@@ -4709,21 +4797,23 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
- deflink.color_change_finalize_work);
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
+ color_change_finalize_work);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
lockdep_assert_wiphy(local->hw.wiphy);
/* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.bss_conf.color_change_active)
+ if (!link_conf->color_change_active)
return;
if (!ieee80211_sdata_running(sdata))
return;
- ieee80211_color_change_finalize(sdata);
+ ieee80211_color_change_finalize(link);
}
void ieee80211_color_collision_detection_work(struct work_struct *work)
@@ -4734,30 +4824,60 @@ void ieee80211_color_collision_detection_work(struct work_struct *work)
color_collision_detect_work);
struct ieee80211_sub_if_data *sdata = link->sdata;
- cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap);
+ cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap,
+ link->link_id);
}
-void ieee80211_color_change_finish(struct ieee80211_vif *vif)
+void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
+
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return;
+
+ rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ return;
+ }
wiphy_work_queue(sdata->local->hw.wiphy,
- &sdata->deflink.color_change_finalize_work);
+ &link->color_change_finalize_work);
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap, gfp_t gfp)
+ u64 color_bitmap, u8 link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_link_data *link = &sdata->deflink;
+ struct ieee80211_link_data *link;
+
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return;
+
+ rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ return;
+ }
- if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active)
+ if (link->conf->color_change_active || link->conf->csa_active) {
+ rcu_read_unlock();
return;
+ }
- if (delayed_work_pending(&link->color_collision_detect_work))
+ if (delayed_work_pending(&link->color_collision_detect_work)) {
+ rcu_read_unlock();
return;
+ }
link->color_bitmap = color_bitmap;
/* queue the color collision detection event every 500 ms in order to
@@ -4766,6 +4886,8 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
ieee80211_queue_delayed_work(&sdata->local->hw,
&link->color_collision_detect_work,
msecs_to_jiffies(500));
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify);
@@ -4775,36 +4897,48 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_link_data *link;
+ u8 link_id = params->link_id;
u64 changed = 0;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- if (sdata->vif.bss_conf.nontransmitted)
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return -EINVAL;
+
+ link = wiphy_dereference(wiphy, sdata->link[link_id]);
+ if (!link)
+ return -ENOLINK;
+
+ link_conf = link->conf;
+
+ if (link_conf->nontransmitted)
return -EINVAL;
/* don't allow another color change if one is already active or if csa
* is active
*/
- if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) {
+ if (link_conf->color_change_active || link_conf->csa_active) {
err = -EBUSY;
goto out;
}
- err = ieee80211_set_color_change_beacon(sdata, params, &changed);
+ err = ieee80211_set_color_change_beacon(link, params, &changed);
if (err)
goto out;
- sdata->vif.bss_conf.color_change_active = true;
- sdata->vif.bss_conf.color_change_color = params->color;
+ link_conf->color_change_active = true;
+ link_conf->color_change_color = params->color;
- cfg80211_color_change_started_notify(sdata->dev, params->count);
+ cfg80211_color_change_started_notify(sdata->dev, params->count, link_id);
if (changed)
- ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed);
+ ieee80211_color_change_bss_config_notify(link, 0, 0, changed);
else
/* if the beacon didn't change, we can finalize immediately */
- ieee80211_color_change_finalize(sdata);
+ ieee80211_color_change_finalize(link);
out:
@@ -4967,6 +5101,17 @@ static int ieee80211_set_hw_timestamp(struct wiphy *wiphy,
return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts);
}
+static int
+ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ttlm_params *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ return ieee80211_req_neg_ttlm(sdata, params);
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -5079,4 +5224,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.mod_link_station = ieee80211_mod_link_station,
.del_link_station = ieee80211_del_link_station,
.set_hw_timestamp = ieee80211_set_hw_timestamp,
+ .set_ttlm = ieee80211_set_ttlm,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index ef4c2cebc0..e6a7ff6ca6 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 - channel management
- * Copyright 2020 - 2022 Intel Corporation
+ * Copyright 2020 - 2024 Intel Corporation
*/
#include <linux/nl80211.h>
@@ -81,87 +81,122 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link)
return container_of(conf, struct ieee80211_chanctx, conf);
}
-static const struct cfg80211_chan_def *
-ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
+bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b)
+{
+ if (!cfg80211_chandef_identical(&a->oper, &b->oper))
+ return false;
+ if (!a->ap.chan && !b->ap.chan)
+ return true;
+ return cfg80211_chandef_identical(&a->ap, &b->ap);
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct cfg80211_chan_def *compat;
+
+ if (a->ap.chan && b->ap.chan &&
+ !cfg80211_chandef_identical(&a->ap, &b->ap))
+ return NULL;
+
+ compat = cfg80211_chandef_compatible(&a->oper, &b->oper);
+ if (!compat)
+ return NULL;
+
+ /* Note: later code assumes this always fills & returns tmp if compat */
+ tmp->oper = *compat;
+ tmp->ap = a->ap.chan ? a->ap : b->ap;
+ return tmp;
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct ieee80211_chan_req *ret;
+ struct ieee80211_chan_req tmp2;
+
+ *tmp = (struct ieee80211_chan_req){
+ .oper = ctx->conf.def,
+ .ap = ctx->conf.ap,
+ };
+
+ ret = ieee80211_chanreq_compatible(tmp, req, &tmp2);
+ if (!ret)
+ return NULL;
+ *tmp = *ret;
+ return tmp;
+}
+
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
{
struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(link, &ctx->reserved_links,
- reserved_chanctx_list) {
- if (!compat)
- compat = &link->reserved_chandef;
+ if (WARN_ON(!req))
+ return NULL;
- compat = cfg80211_chandef_compatible(&link->reserved_chandef,
- compat);
- if (!compat)
+ list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) {
+ req = ieee80211_chanreq_compatible(&link->reserved, req, tmp);
+ if (!req)
break;
}
- return compat;
+ return req;
}
-static const struct cfg80211_chan_def *
+static const struct ieee80211_chan_req *
ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
+ const struct ieee80211_chan_req *compat,
+ struct ieee80211_chan_req *tmp)
{
struct ieee80211_link_data *link;
+ const struct ieee80211_chan_req *comp_def = compat;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(link, &ctx->assigned_links,
- assigned_chanctx_list) {
+ list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) {
struct ieee80211_bss_conf *link_conf = link->conf;
if (link->reserved_chanctx)
continue;
- if (!compat)
- compat = &link_conf->chandef;
-
- compat = cfg80211_chandef_compatible(
- &link_conf->chandef, compat);
- if (!compat)
+ comp_def = ieee80211_chanreq_compatible(&link_conf->chanreq,
+ comp_def, tmp);
+ if (!comp_def)
break;
}
- return compat;
-}
-
-static const struct cfg80211_chan_def *
-ieee80211_chanctx_combined_chandef(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *compat)
-{
- lockdep_assert_wiphy(local->hw.wiphy);
-
- compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat);
- if (!compat)
- return NULL;
-
- compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat);
- if (!compat)
- return NULL;
-
- return compat;
+ return comp_def;
}
static bool
-ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *def)
+ieee80211_chanctx_can_reserve(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req)
{
+ struct ieee80211_chan_req tmp;
+
lockdep_assert_wiphy(local->hw.wiphy);
- if (ieee80211_chanctx_combined_chandef(local, ctx, def))
- return true;
+ if (!ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp))
+ return false;
+
+ if (!ieee80211_chanctx_non_reserved_chandef(local, ctx, req, &tmp))
+ return false;
if (!list_empty(&ctx->reserved_links) &&
- ieee80211_chanctx_reserved_chandef(local, ctx, def))
+ ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp))
return true;
return false;
@@ -169,7 +204,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
@@ -186,8 +221,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
- if (!ieee80211_chanctx_can_reserve_chandef(local, ctx,
- chandef))
+ if (!ieee80211_chanctx_can_reserve(local, ctx, chanreq))
continue;
return ctx;
@@ -202,7 +236,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta,
enum ieee80211_sta_rx_bandwidth width;
struct link_sta_info *link_sta;
- link_sta = rcu_dereference(sta->link[link_id]);
+ link_sta = wiphy_dereference(sta->local->hw.wiphy, sta->link[link_id]);
/* no effect if this STA has no presence on this link */
if (!link_sta)
@@ -240,9 +274,10 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta,
}
static enum nl80211_chan_width
-ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
- unsigned int link_id)
+ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ unsigned int link_id = link->link_id;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
@@ -258,31 +293,25 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
}
static enum nl80211_chan_width
-ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
+ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
- struct ieee80211_vif *vif = &sdata->vif;
- int link_id;
- rcu_read_lock();
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ for_each_sdata_link(local, link) {
enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT;
- struct ieee80211_link_data *link =
- rcu_dereference(sdata->link[link_id]);
-
- if (!link)
- continue;
if (link != rsvd_for &&
rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf)
continue;
- switch (vif->type) {
+ switch (link->sdata->vif.type) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
- width = ieee80211_get_max_required_bw(sdata, link_id);
+ width = ieee80211_get_max_required_bw(link);
break;
case NL80211_IFTYPE_STATION:
/*
@@ -290,8 +319,8 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
* point, so take the width from the chandef, but
* account also for TDLS peers
*/
- width = max(link->conf->chandef.width,
- ieee80211_get_max_required_bw(sdata, link_id));
+ width = max(link->conf->chanreq.oper.width,
+ ieee80211_get_max_required_bw(link));
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
@@ -299,7 +328,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
- width = link->conf->chandef.width;
+ width = link->conf->chanreq.oper.width;
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
@@ -312,40 +341,13 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
max_bw = max(max_bw, width);
}
- rcu_read_unlock();
-
- return max_bw;
-}
-
-static enum nl80211_chan_width
-ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- struct ieee80211_link_data *rsvd_for)
-{
- struct ieee80211_sub_if_data *sdata;
- enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- enum nl80211_chan_width width;
-
- if (!ieee80211_sdata_running(sdata))
- continue;
-
- width = ieee80211_get_chanctx_vif_max_required_bw(sdata, ctx,
- rsvd_for);
-
- max_bw = max(max_bw, width);
- }
/* use the configured bandwidth in case of monitor interface */
- sdata = rcu_dereference(local->monitor_sdata);
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
if (sdata &&
rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &ctx->conf)
max_bw = max(max_bw, ctx->conf.def.width);
- rcu_read_unlock();
-
return max_bw;
}
@@ -382,7 +384,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
/* downgrade chandef up to max_bw */
min_def = ctx->conf.def;
while (min_def.width > max_bw)
- ieee80211_chandef_downgrade(&min_def);
+ ieee80211_chandef_downgrade(&min_def, NULL);
if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def))
return 0;
@@ -395,7 +397,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
}
/* calling this function is assuming that station vif is updated to
- * lates changes by calling ieee80211_link_update_chandef
+ * lates changes by calling ieee80211_link_update_chanreq
*/
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
@@ -475,10 +477,15 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
static void _ieee80211_change_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
struct ieee80211_chanctx *old_ctx,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
struct ieee80211_link_data *rsvd_for)
{
- u32 changed;
+ const struct cfg80211_chan_def *chandef = &chanreq->oper;
+ struct ieee80211_chan_req ctx_req = {
+ .oper = ctx->conf.def,
+ .ap = ctx->conf.ap,
+ };
+ u32 changed = 0;
/* expected to handle only 20/40/80/160/320 channel widths */
switch (chandef->width) {
@@ -500,47 +507,54 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
*/
ieee80211_chan_bw_change(local, old_ctx, true);
- if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
+ if (ieee80211_chanreq_identical(&ctx_req, chanreq)) {
ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
return;
}
- WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ WARN_ON(ieee80211_chanctx_refcount(local, ctx) > 1 &&
+ !cfg80211_chandef_compatible(&ctx->conf.def, &chanreq->oper));
ieee80211_remove_wbrf(local, &ctx->conf.def);
+ if (!cfg80211_chandef_identical(&ctx->conf.def, &chanreq->oper)) {
+ if (ctx->conf.def.width != chanreq->oper.width)
+ changed |= IEEE80211_CHANCTX_CHANGE_WIDTH;
+ if (ctx->conf.def.punctured != chanreq->oper.punctured)
+ changed |= IEEE80211_CHANCTX_CHANGE_PUNCTURING;
+ }
+ if (!cfg80211_chandef_identical(&ctx->conf.ap, &chanreq->ap))
+ changed |= IEEE80211_CHANCTX_CHANGE_AP;
ctx->conf.def = *chandef;
+ ctx->conf.ap = chanreq->ap;
/* check if min chanctx also changed */
- changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
- _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+ changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
ieee80211_add_wbrf(local, &ctx->conf.def);
drv_change_chanctx(local, ctx, changed);
- if (!local->use_chanctx) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
- }
-
- /* check is BW wider */
+ /* check if BW is wider */
ieee80211_chan_bw_change(local, old_ctx, false);
}
static void ieee80211_change_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
struct ieee80211_chanctx *old_ctx,
- const struct cfg80211_chan_def *chandef)
+ const struct ieee80211_chan_req *chanreq)
{
- _ieee80211_change_chanctx(local, ctx, old_ctx, chandef, NULL);
+ _ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL);
}
+/* Note: if successful, the returned chanctx is reserved for the link */
static struct ieee80211_chanctx *
ieee80211_find_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
+ struct ieee80211_chan_req tmp;
struct ieee80211_chanctx *ctx;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -548,8 +562,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
return NULL;
+ if (WARN_ON(link->reserved_chanctx))
+ return NULL;
+
list_for_each_entry(ctx, &local->chanctx_list, list) {
- const struct cfg80211_chan_def *compat;
+ const struct ieee80211_chan_req *compat;
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACE_NONE)
continue;
@@ -557,15 +574,25 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
- compat = cfg80211_chandef_compatible(&ctx->conf.def, chandef);
+ compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp);
if (!compat)
continue;
- compat = ieee80211_chanctx_reserved_chandef(local, ctx,
- compat);
+ compat = ieee80211_chanctx_reserved_chanreq(local, ctx,
+ compat, &tmp);
if (!compat)
continue;
+ /*
+ * Reserve the chanctx temporarily, as the driver might change
+ * active links during callbacks we make into it below and/or
+ * later during assignment, which could (otherwise) cause the
+ * context to actually be removed.
+ */
+ link->reserved_chanctx = ctx;
+ list_add(&link->reserved_chanctx_list,
+ &ctx->reserved_links);
+
ieee80211_change_chanctx(local, ctx, ctx, compat);
return ctx;
@@ -576,26 +603,14 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
bool ieee80211_is_radar_required(struct ieee80211_local *local)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- unsigned int link_id;
-
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
-
- if (link && link->radar_required) {
- rcu_read_unlock();
- return true;
- }
- }
+ for_each_sdata_link(local, link) {
+ if (link->radar_required)
+ return true;
}
- rcu_read_unlock();
return false;
}
@@ -605,43 +620,24 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
struct ieee80211_chanctx_conf *conf = &ctx->conf;
- struct ieee80211_sub_if_data *sdata;
- bool required = false;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- unsigned int link_id;
-
- if (!ieee80211_sdata_running(sdata))
+ for_each_sdata_link(local, link) {
+ if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
continue;
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
- if (!link)
- continue;
-
- if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
- continue;
- if (!link->radar_required)
- continue;
- required = true;
- break;
- }
-
- if (required)
- break;
+ if (!link->radar_required)
+ continue;
+ return true;
}
- rcu_read_unlock();
- return required;
+ return false;
}
static struct ieee80211_chanctx *
ieee80211_alloc_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
@@ -654,7 +650,8 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
INIT_LIST_HEAD(&ctx->assigned_links);
INIT_LIST_HEAD(&ctx->reserved_links);
- ctx->conf.def = *chandef;
+ ctx->conf.def = chanreq->oper;
+ ctx->conf.ap = chanreq->ap;
ctx->conf.rx_chains_static = 1;
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
@@ -674,23 +671,15 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
ieee80211_add_wbrf(local, &ctx->conf.def);
- if (!local->use_chanctx)
- local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
-
/* turn idle off *before* setting channel -- some drivers need that */
changed = ieee80211_idle_off(local);
if (changed)
ieee80211_hw_config(local, changed);
- if (!local->use_chanctx) {
- local->_oper_chandef = ctx->conf.def;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- } else {
- err = drv_add_chanctx(local, ctx);
- if (err) {
- ieee80211_recalc_idle(local);
- return err;
- }
+ err = drv_add_chanctx(local, ctx);
+ if (err) {
+ ieee80211_recalc_idle(local);
+ return err;
}
return 0;
@@ -698,74 +687,55 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
static struct ieee80211_chanctx *
ieee80211_new_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode)
+ const struct ieee80211_chan_req *chanreq,
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure)
{
struct ieee80211_chanctx *ctx;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
if (!ctx)
return ERR_PTR(-ENOMEM);
err = ieee80211_add_chanctx(local, ctx);
- if (err) {
+ if (!assign_on_failure && err) {
kfree(ctx);
return ERR_PTR(err);
}
+ /* We ignored a driver error, see _ieee80211_set_active_links */
+ WARN_ON_ONCE(err && !local->in_reconfig);
list_add_rcu(&ctx->list, &local->chanctx_list);
return ctx;
}
static void ieee80211_del_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool skip_idle_recalc)
{
lockdep_assert_wiphy(local->hw.wiphy);
- if (!local->use_chanctx) {
- struct cfg80211_chan_def *chandef = &local->_oper_chandef;
- /* S1G doesn't have 20MHz, so get the correct width for the
- * current channel.
- */
- if (chandef->chan->band == NL80211_BAND_S1GHZ)
- chandef->width =
- ieee80211_s1g_channel_width(chandef->chan);
- else
- chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef->center_freq1 = chandef->chan->center_freq;
- chandef->freq1_offset = chandef->chan->freq_offset;
- chandef->center_freq2 = 0;
-
- /* NOTE: Disabling radar is only valid here for
- * single channel context. To be sure, check it ...
- */
- WARN_ON(local->hw.conf.radar_enabled &&
- !list_empty(&local->chanctx_list));
-
- local->hw.conf.radar_enabled = false;
-
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- } else {
- drv_remove_chanctx(local, ctx);
- }
+ drv_remove_chanctx(local, ctx);
- ieee80211_recalc_idle(local);
+ if (!skip_idle_recalc)
+ ieee80211_recalc_idle(local);
ieee80211_remove_wbrf(local, &ctx->conf.def);
}
static void ieee80211_free_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool skip_idle_recalc)
{
lockdep_assert_wiphy(local->hw.wiphy);
WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0);
list_del_rcu(&ctx->list);
- ieee80211_del_chanctx(local, ctx);
+ ieee80211_del_chanctx(local, ctx, skip_idle_recalc);
kfree_rcu(ctx, rcu_head);
}
@@ -773,64 +743,64 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
struct ieee80211_chanctx_conf *conf = &ctx->conf;
- struct ieee80211_sub_if_data *sdata;
- const struct cfg80211_chan_def *compat = NULL;
+ const struct ieee80211_chan_req *compat = NULL;
+ struct ieee80211_link_data *link;
+ struct ieee80211_chan_req tmp;
struct sta_info *sta;
lockdep_assert_wiphy(local->hw.wiphy);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- int link_id;
-
- if (!ieee80211_sdata_running(sdata))
- continue;
+ for_each_sdata_link(local, link) {
+ struct ieee80211_bss_conf *link_conf;
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (link->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
continue;
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_bss_conf *link_conf =
- rcu_dereference(sdata->vif.link_conf[link_id]);
-
- if (!link_conf)
- continue;
+ link_conf = link->conf;
- if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
- continue;
+ if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
+ continue;
- if (!compat)
- compat = &link_conf->chandef;
+ if (!compat)
+ compat = &link_conf->chanreq;
- compat = cfg80211_chandef_compatible(&link_conf->chandef,
- compat);
- if (WARN_ON_ONCE(!compat))
- break;
- }
+ compat = ieee80211_chanreq_compatible(&link_conf->chanreq,
+ compat, &tmp);
+ if (WARN_ON_ONCE(!compat))
+ return;
}
- if (WARN_ON_ONCE(!compat)) {
- rcu_read_unlock();
+ if (WARN_ON_ONCE(!compat))
return;
- }
/* TDLS peers can sometimes affect the chandef width */
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_chan_req tdls_chanreq = {};
+ int tdls_link_id;
+
if (!sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
!sta->tdls_chandef.chan)
continue;
- compat = cfg80211_chandef_compatible(&sta->tdls_chandef,
- compat);
+ tdls_link_id = ieee80211_tdls_sta_link_id(sta);
+ link = sdata_dereference(sdata->link[tdls_link_id], sdata);
+ if (!link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
+ continue;
+
+ tdls_chanreq.oper = sta->tdls_chandef;
+
+ /* note this always fills and returns &tmp if compat */
+ compat = ieee80211_chanreq_compatible(&tdls_chanreq,
+ compat, &tmp);
if (WARN_ON_ONCE(!compat))
- break;
+ return;
}
- rcu_read_unlock();
-
- if (!compat)
- return;
ieee80211_change_chanctx(local, ctx, ctx, compat);
}
@@ -849,22 +819,19 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
chanctx->conf.radar_enabled = radar_enabled;
- if (!local->use_chanctx) {
- local->hw.conf.radar_enabled = chanctx->conf.radar_enabled;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- }
-
drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
}
static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
- struct ieee80211_chanctx *new_ctx)
+ struct ieee80211_chanctx *new_ctx,
+ bool assign_on_failure)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *curr_ctx = NULL;
- int ret = 0;
+ bool new_idle;
+ int ret;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
return -EOPNOTSUPP;
@@ -885,19 +852,22 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
ieee80211_recalc_chanctx_min_def(local, new_ctx, link);
ret = drv_assign_vif_chanctx(local, sdata, link->conf, new_ctx);
- if (ret)
- goto out;
-
- conf = &new_ctx->conf;
- list_add(&link->assigned_chanctx_list,
- &new_ctx->assigned_links);
+ if (assign_on_failure || !ret) {
+ /* Need to continue, see _ieee80211_set_active_links */
+ WARN_ON_ONCE(ret && !local->in_reconfig);
+ ret = 0;
+
+ /* succeeded, so commit it to the data structures */
+ conf = &new_ctx->conf;
+ list_add(&link->assigned_chanctx_list,
+ &new_ctx->assigned_links);
+ }
+ } else {
+ ret = 0;
}
-out:
rcu_assign_pointer(link->conf->chanctx_conf, conf);
- sdata->vif.cfg.idle = !conf;
-
if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
ieee80211_recalc_smps_chanctx(local, curr_ctx);
@@ -910,9 +880,27 @@ out:
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
}
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ if (conf) {
+ new_idle = false;
+ } else {
+ struct ieee80211_link_data *tmp;
+
+ new_idle = true;
+ for_each_sdata_link(local, tmp) {
+ if (rcu_access_pointer(tmp->conf->chanctx_conf)) {
+ new_idle = false;
+ break;
+ }
+ }
+ }
+
+ if (new_idle != sdata->vif.cfg.idle) {
+ sdata->vif.cfg.idle = new_idle;
+
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_MONITOR)
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ }
ieee80211_check_fast_xmit_iface(sdata);
@@ -924,23 +912,19 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
{
struct ieee80211_sub_if_data *sdata;
u8 rx_chains_static, rx_chains_dynamic;
+ struct ieee80211_link_data *link;
lockdep_assert_wiphy(local->hw.wiphy);
rx_chains_static = 1;
rx_chains_dynamic = 1;
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ for_each_sdata_link(local, link) {
u8 needed_static, needed_dynamic;
- unsigned int link_id;
-
- if (!ieee80211_sdata_running(sdata))
- continue;
- switch (sdata->vif.type) {
+ switch (link->sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- if (!sdata->u.mgd.associated)
+ if (!link->sdata->u.mgd.associated)
continue;
break;
case NL80211_IFTYPE_AP:
@@ -952,59 +936,38 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
continue;
}
- for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(sdata->link[link_id]);
-
- if (!link)
- continue;
-
- if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
- continue;
-
- switch (link->smps_mode) {
- default:
- WARN_ONCE(1, "Invalid SMPS mode %d\n",
- link->smps_mode);
- fallthrough;
- case IEEE80211_SMPS_OFF:
- needed_static = link->needed_rx_chains;
- needed_dynamic = link->needed_rx_chains;
- break;
- case IEEE80211_SMPS_DYNAMIC:
- needed_static = 1;
- needed_dynamic = link->needed_rx_chains;
- break;
- case IEEE80211_SMPS_STATIC:
- needed_static = 1;
- needed_dynamic = 1;
- break;
- }
+ if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
+ continue;
- rx_chains_static = max(rx_chains_static, needed_static);
- rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
+ switch (link->smps_mode) {
+ default:
+ WARN_ONCE(1, "Invalid SMPS mode %d\n",
+ link->smps_mode);
+ fallthrough;
+ case IEEE80211_SMPS_OFF:
+ needed_static = link->needed_rx_chains;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_DYNAMIC:
+ needed_static = 1;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_STATIC:
+ needed_static = 1;
+ needed_dynamic = 1;
+ break;
}
+
+ rx_chains_static = max(rx_chains_static, needed_static);
+ rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
}
/* Disable SMPS for the monitor interface */
- sdata = rcu_dereference(local->monitor_sdata);
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
if (sdata &&
rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &chanctx->conf)
rx_chains_dynamic = rx_chains_static = local->rx_chains;
- rcu_read_unlock();
-
- if (!local->use_chanctx) {
- if (rx_chains_static > 1)
- local->smps_mode = IEEE80211_SMPS_OFF;
- else if (rx_chains_dynamic > 1)
- local->smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
- local->smps_mode = IEEE80211_SMPS_STATIC;
- ieee80211_hw_config(local, 0);
- }
-
if (rx_chains_static == chanctx->conf.rx_chains_static &&
rx_chains_dynamic == chanctx->conf.rx_chains_dynamic)
return;
@@ -1043,17 +1006,16 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
if (clear)
conf = NULL;
- rcu_read_lock();
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
struct ieee80211_bss_conf *vlan_conf;
- vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ vlan_conf = wiphy_dereference(local->hw.wiphy,
+ vlan->vif.link_conf[link_id]);
if (WARN_ON(!vlan_conf))
continue;
rcu_assign_pointer(vlan_conf->chanctx_conf, conf);
}
- rcu_read_unlock();
}
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
@@ -1095,7 +1057,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
list_del_rcu(&ctx->list);
kfree_rcu(ctx, rcu_head);
} else {
- ieee80211_free_chanctx(sdata->local, ctx);
+ ieee80211_free_chanctx(sdata->local, ctx, false);
}
}
@@ -1103,7 +1065,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
}
int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *chanreq,
enum ieee80211_chanctx_mode mode,
bool radar_required)
{
@@ -1114,13 +1076,14 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
lockdep_assert_wiphy(local->hw.wiphy);
curr_ctx = ieee80211_link_get_chanctx(link);
- if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
+ if (curr_ctx && !local->ops->switch_vif_chanctx)
return -EOPNOTSUPP;
- new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode);
if (!new_ctx) {
if (ieee80211_can_create_new_chanctx(local)) {
- new_ctx = ieee80211_new_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_new_chanctx(local, chanreq, mode,
+ false);
if (IS_ERR(new_ctx))
return PTR_ERR(new_ctx);
} else {
@@ -1174,7 +1137,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
!list_empty(&curr_ctx->reserved_links))
return -EBUSY;
- new_ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode);
if (!new_ctx)
return -ENOMEM;
@@ -1192,7 +1155,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links);
link->reserved_chanctx = new_ctx;
- link->reserved_chandef = *chandef;
+ link->reserved = *chanreq;
link->reserved_radar_required = radar_required;
link->reserved_ready = false;
@@ -1231,29 +1194,28 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
}
static void
-ieee80211_link_update_chandef(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef)
+ieee80211_link_update_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
unsigned int link_id = link->link_id;
struct ieee80211_sub_if_data *vlan;
- link->conf->chandef = *chandef;
+ link->conf->chanreq = *chanreq;
if (sdata->vif.type != NL80211_IFTYPE_AP)
return;
- rcu_read_lock();
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
struct ieee80211_bss_conf *vlan_conf;
- vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ vlan_conf = wiphy_dereference(sdata->local->hw.wiphy,
+ vlan->vif.link_conf[link_id]);
if (WARN_ON(!vlan_conf))
continue;
- vlan_conf->chandef = *chandef;
+ vlan_conf->chanreq = *chanreq;
}
- rcu_read_unlock();
}
static int
@@ -1264,7 +1226,8 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
struct ieee80211_local *local = sdata->local;
struct ieee80211_vif_chanctx_switch vif_chsw[1] = {};
struct ieee80211_chanctx *old_ctx, *new_ctx;
- const struct cfg80211_chan_def *chandef;
+ const struct ieee80211_chan_req *chanreq;
+ struct ieee80211_chan_req tmp;
u64 changed = 0;
int err;
@@ -1286,17 +1249,18 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
IEEE80211_CHANCTX_REPLACES_OTHER))
return -EINVAL;
- chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &link->reserved_chandef);
- if (WARN_ON(!chandef))
+ chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &link->reserved,
+ &tmp);
+ if (WARN_ON(!chanreq))
return -EINVAL;
- if (link_conf->chandef.width != link->reserved_chandef.width)
+ if (link_conf->chanreq.oper.width != link->reserved.oper.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_link_update_chandef(link, &link->reserved_chandef);
+ ieee80211_link_update_chanreq(link, &link->reserved);
- _ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef, link);
+ _ieee80211_change_chanctx(local, new_ctx, old_ctx, chanreq, link);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1310,7 +1274,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
CHANCTX_SWMODE_REASSIGN_VIF);
if (err) {
if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
- ieee80211_free_chanctx(local, new_ctx);
+ ieee80211_free_chanctx(local, new_ctx, false);
goto out;
}
@@ -1324,7 +1288,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
ieee80211_check_fast_xmit_iface(sdata);
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
- ieee80211_free_chanctx(local, old_ctx);
+ ieee80211_free_chanctx(local, old_ctx, false);
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
ieee80211_recalc_smps_chanctx(local, new_ctx);
@@ -1344,7 +1308,8 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *old_ctx, *new_ctx;
- const struct cfg80211_chan_def *chandef;
+ const struct ieee80211_chan_req *chanreq;
+ struct ieee80211_chan_req tmp;
int err;
old_ctx = ieee80211_link_get_chanctx(link);
@@ -1363,20 +1328,21 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
IEEE80211_CHANCTX_REPLACES_OTHER))
return -EINVAL;
- chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &link->reserved_chandef);
- if (WARN_ON(!chandef))
+ chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &link->reserved,
+ &tmp);
+ if (WARN_ON(!chanreq))
return -EINVAL;
- ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
+ ieee80211_change_chanctx(local, new_ctx, new_ctx, chanreq);
list_del(&link->reserved_chanctx_list);
link->reserved_chanctx = NULL;
- err = ieee80211_assign_link_chanctx(link, new_ctx);
+ err = ieee80211_assign_link_chanctx(link, new_ctx, false);
if (err) {
if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
- ieee80211_free_chanctx(local, new_ctx);
+ ieee80211_free_chanctx(local, new_ctx, false);
goto out;
}
@@ -1412,24 +1378,6 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link)
return true;
}
-static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local,
- struct ieee80211_chanctx *new_ctx)
-{
- const struct cfg80211_chan_def *chandef;
-
- lockdep_assert_wiphy(local->hw.wiphy);
-
- chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL);
- if (WARN_ON(!chandef))
- return -EINVAL;
-
- local->hw.conf.radar_enabled = new_ctx->conf.radar_enabled;
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
-
- return 0;
-}
-
static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
int n_vifs)
{
@@ -1491,7 +1439,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local)
if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
- ieee80211_del_chanctx(local, ctx->replace_ctx);
+ ieee80211_del_chanctx(local, ctx->replace_ctx, false);
err = ieee80211_add_chanctx(local, ctx);
if (err)
goto err;
@@ -1508,7 +1456,7 @@ err:
if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
- ieee80211_del_chanctx(local, ctx);
+ ieee80211_del_chanctx(local, ctx, false);
WARN_ON(ieee80211_add_chanctx(local, ctx->replace_ctx));
}
@@ -1518,7 +1466,6 @@ err:
static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
{
struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx;
- struct ieee80211_chanctx *new_ctx = NULL;
int err, n_assigned, n_reserved, n_ready;
int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0;
@@ -1551,9 +1498,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
goto err;
}
- if (!local->use_chanctx)
- new_ctx = ctx;
-
n_ctx++;
n_assigned = 0;
@@ -1607,9 +1551,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
if (WARN_ON(n_ctx == 0) ||
WARN_ON(n_vifs_switch == 0 &&
n_vifs_assign == 0 &&
- n_vifs_ctxless == 0) ||
- WARN_ON(n_ctx > 1 && !local->use_chanctx) ||
- WARN_ON(!new_ctx && !local->use_chanctx)) {
+ n_vifs_ctxless == 0)) {
err = -EINVAL;
goto err;
}
@@ -1619,20 +1561,14 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
* reservations and driver capabilities.
*/
- if (local->use_chanctx) {
- if (n_vifs_switch > 0) {
- err = ieee80211_chsw_switch_vifs(local, n_vifs_switch);
- if (err)
- goto err;
- }
+ if (n_vifs_switch > 0) {
+ err = ieee80211_chsw_switch_vifs(local, n_vifs_switch);
+ if (err)
+ goto err;
+ }
- if (n_vifs_assign > 0 || n_vifs_ctxless > 0) {
- err = ieee80211_chsw_switch_ctxs(local);
- if (err)
- goto err;
- }
- } else {
- err = ieee80211_chsw_switch_hwconf(local, new_ctx);
+ if (n_vifs_assign > 0 || n_vifs_ctxless > 0) {
+ err = ieee80211_chsw_switch_ctxs(local);
if (err)
goto err;
}
@@ -1672,10 +1608,10 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
link->radar_required = link->reserved_radar_required;
- if (link_conf->chandef.width != link->reserved_chandef.width)
+ if (link_conf->chanreq.oper.width != link->reserved.oper.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_link_update_chandef(link, &link->reserved_chandef);
+ ieee80211_link_update_chanreq(link, &link->reserved);
if (changed)
ieee80211_link_info_change_notify(sdata,
link,
@@ -1772,7 +1708,8 @@ err:
return err;
}
-static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
+void __ieee80211_link_release_channel(struct ieee80211_link_data *link,
+ bool skip_idle_recalc)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
@@ -1798,9 +1735,9 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
ieee80211_link_unreserve_chanctx(link);
}
- ieee80211_assign_link_chanctx(link, NULL);
+ ieee80211_assign_link_chanctx(link, NULL, false);
if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
+ ieee80211_free_chanctx(local, ctx, skip_idle_recalc);
link->radar_required = false;
@@ -1809,56 +1746,69 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
ieee80211_vif_use_reserved_switch(local);
}
-int ieee80211_link_use_channel(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode)
+int _ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq,
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *ctx;
u8 radar_detect_width = 0;
+ bool reserved = false;
int ret;
lockdep_assert_wiphy(local->hw.wiphy);
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link->link_id))) {
- ieee80211_link_update_chandef(link, chandef);
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) {
+ ieee80211_link_update_chanreq(link, chanreq);
return 0;
}
ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
- chandef,
+ &chanreq->oper,
sdata->wdev.iftype);
if (ret < 0)
goto out;
if (ret > 0)
- radar_detect_width = BIT(chandef->width);
+ radar_detect_width = BIT(chanreq->oper.width);
link->radar_required = ret;
- ret = ieee80211_check_combinations(sdata, chandef, mode,
+ ret = ieee80211_check_combinations(sdata, &chanreq->oper, mode,
radar_detect_width);
if (ret < 0)
goto out;
- __ieee80211_link_release_channel(link);
+ __ieee80211_link_release_channel(link, false);
- ctx = ieee80211_find_chanctx(local, chandef, mode);
- if (!ctx)
- ctx = ieee80211_new_chanctx(local, chandef, mode);
+ ctx = ieee80211_find_chanctx(local, link, chanreq, mode);
+ /* Note: context is now reserved */
+ if (ctx)
+ reserved = true;
+ else
+ ctx = ieee80211_new_chanctx(local, chanreq, mode,
+ assign_on_failure);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto out;
}
- ieee80211_link_update_chandef(link, chandef);
+ ieee80211_link_update_chanreq(link, chanreq);
+
+ ret = ieee80211_assign_link_chanctx(link, ctx, assign_on_failure);
+
+ if (reserved) {
+ /* remove reservation */
+ WARN_ON(link->reserved_chanctx != ctx);
+ link->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ }
- ret = ieee80211_assign_link_chanctx(link, ctx);
if (ret) {
/* if assign fails refcount stays the same */
if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
+ ieee80211_free_chanctx(local, ctx, false);
goto out;
}
@@ -1932,28 +1882,79 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link)
return 0;
}
-int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- u64 *changed)
+/*
+ * This is similar to ieee80211_chanctx_compatible(), but rechecks
+ * against all the links actually using it (except the one that's
+ * passed, since that one is changing).
+ * This is done in order to allow changes to the AP's bandwidth for
+ * wider bandwidth OFDMA purposes, which wouldn't be treated as
+ * compatible by ieee80211_chanctx_recheck() but is OK if the link
+ * requesting the update is the only one using it.
+ */
+static const struct ieee80211_chan_req *
+ieee80211_chanctx_recheck(struct ieee80211_local *local,
+ struct ieee80211_link_data *skip_link,
+ struct ieee80211_chanctx *ctx,
+ const struct ieee80211_chan_req *req,
+ struct ieee80211_chan_req *tmp)
+{
+ const struct ieee80211_chan_req *ret = req;
+ struct ieee80211_link_data *link;
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ for_each_sdata_link(local, link) {
+ if (link == skip_link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) == &ctx->conf) {
+ ret = ieee80211_chanreq_compatible(ret,
+ &link->conf->chanreq,
+ tmp);
+ if (!ret)
+ return NULL;
+ }
+
+ if (link->reserved_chanctx == ctx) {
+ ret = ieee80211_chanreq_compatible(ret,
+ &link->reserved,
+ tmp);
+ if (!ret)
+ return NULL;
+ }
+ }
+
+ *tmp = *ret;
+ return tmp;
+}
+
+int ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *chanreq,
+ u64 *changed)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
- const struct cfg80211_chan_def *compat;
+ const struct ieee80211_chan_req *compat;
+ struct ieee80211_chan_req tmp;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy,
+ &chanreq->oper,
IEEE80211_CHAN_DISABLED))
return -EINVAL;
- if (cfg80211_chandef_identical(chandef, &link_conf->chandef))
+ /* for non-HT 20 MHz the rest doesn't matter */
+ if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT &&
+ cfg80211_chandef_identical(&chanreq->oper, &link_conf->chanreq.oper))
return 0;
- if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
- link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ /* but you cannot switch to/from it */
+ if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ link_conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT)
return -EINVAL;
conf = rcu_dereference_protected(link_conf->chanctx_conf,
@@ -1963,13 +1964,14 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- compat = cfg80211_chandef_compatible(&conf->def, chandef);
+ compat = ieee80211_chanctx_recheck(local, link, ctx, chanreq, &tmp);
if (!compat)
return -EINVAL;
switch (ctx->replace_state) {
case IEEE80211_CHANCTX_REPLACE_NONE:
- if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat))
+ if (!ieee80211_chanctx_reserved_chanreq(local, ctx, compat,
+ &tmp))
return -EBUSY;
break;
case IEEE80211_CHANCTX_WILL_BE_REPLACED:
@@ -1984,7 +1986,7 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
break;
}
- ieee80211_link_update_chandef(link, chandef);
+ ieee80211_link_update_chanreq(link, chanreq);
ieee80211_recalc_chanctx_chantype(local, ctx);
@@ -1999,7 +2001,7 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (rcu_access_pointer(link->conf->chanctx_conf))
- __ieee80211_link_release_channel(link);
+ __ieee80211_link_release_channel(link, false);
}
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
@@ -2019,12 +2021,11 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
- rcu_read_lock();
- ap_conf = rcu_dereference(ap->vif.link_conf[link_id]);
- conf = rcu_dereference_protected(ap_conf->chanctx_conf,
- lockdep_is_held(&local->hw.wiphy->mtx));
+ ap_conf = wiphy_dereference(local->hw.wiphy,
+ ap->vif.link_conf[link_id]);
+ conf = wiphy_dereference(local->hw.wiphy,
+ ap_conf->chanctx_conf);
rcu_assign_pointer(link_conf->chanctx_conf, conf);
- rcu_read_unlock();
}
void ieee80211_iter_chan_contexts_atomic(
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index d49894df23..35a8ba25fa 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -152,16 +152,17 @@ do { \
else \
_sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \
} while (0)
-#define link_dbg(link, fmt, ...) \
+#define _link_id_dbg(print, sdata, link_id, fmt, ...) \
do { \
- if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \
- _sdata_dbg(1, (link)->sdata, "[link %d] " fmt, \
- (link)->link_id, \
- ##__VA_ARGS__); \
+ if (ieee80211_vif_is_mld(&(sdata)->vif)) \
+ _sdata_dbg(print, sdata, "[link %d] " fmt, \
+ link_id, ##__VA_ARGS__); \
else \
- _sdata_dbg(1, (link)->sdata, fmt, \
- ##__VA_ARGS__); \
+ _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \
} while (0)
+#define link_dbg(link, fmt, ...) \
+ _link_id_dbg(1, (link)->sdata, (link)->link_id, \
+ fmt, ##__VA_ARGS__)
#define ht_dbg(sdata, fmt, ...) \
_sdata_dbg(MAC80211_HT_DEBUG, \
@@ -226,6 +227,9 @@ do { \
#define mlme_dbg(sdata, fmt, ...) \
_sdata_dbg(MAC80211_MLME_DEBUG, \
sdata, fmt, ##__VA_ARGS__)
+#define mlme_link_id_dbg(sdata, link_id, fmt, ...) \
+ _link_id_dbg(MAC80211_MLME_DEBUG, sdata, link_id, \
+ fmt, ##__VA_ARGS__)
#define mlme_dbg_ratelimited(sdata, fmt, ...) \
_sdata_dbg(MAC80211_MLME_DEBUG && net_ratelimit(), \
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 74be49191e..98310188f3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -498,6 +498,8 @@ static const char *hw_flag_names[] = {
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
FLAG(DISALLOW_PUNCTURING),
+ FLAG(DISALLOW_PUNCTURING_5GHZ),
+ FLAG(HANDLES_QUIET_CSA),
#undef FLAG
};
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 3b7f70073f..254d745832 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2015 Intel Deutschland GmbH
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include <net/mac80211.h>
#include "ieee80211_i.h"
@@ -214,8 +214,7 @@ int drv_conf_tx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id))
return 0;
if (params->cw_min == 0 || params->cw_min > params->cw_max) {
@@ -312,11 +311,22 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ /*
+ * We should perhaps push emulate chanctx down and only
+ * make it call ->config() when the chanctx is actually
+ * assigned here (and unassigned below), but that's yet
+ * another change to all drivers to add assign/unassign
+ * emulation callbacks. Maybe later.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return 0;
+
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id))
return 0;
trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx);
@@ -340,11 +350,15 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ local->emulate_chanctx &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id))
return;
trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx);
@@ -461,8 +475,7 @@ void drv_link_info_changed(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return;
- if (sdata->vif.active_links &&
- !(sdata->vif.active_links & BIT(link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
return;
trace_drv_link_info_changed(local, sdata, info, changed);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index eb482fb8c3..5d078c0a23 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019, 2021 - 2023 Intel Corporation
+* Copyright (C) 2018-2019, 2021-2024 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -1180,8 +1180,9 @@ drv_post_channel_switch(struct ieee80211_link_data *link)
}
static inline void
-drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
+drv_abort_channel_switch(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
might_sleep();
@@ -1193,7 +1194,8 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
trace_drv_abort_channel_switch(local, sdata);
if (local->ops->abort_channel_switch)
- local->ops->abort_channel_switch(&local->hw, &sdata->vif);
+ local->ops->abort_channel_switch(&local->hw, &sdata->vif,
+ link->conf);
}
static inline void
@@ -1695,4 +1697,23 @@ int drv_change_sta_links(struct ieee80211_local *local,
struct ieee80211_sta *sta,
u16 old_links, u16 new_links);
+static inline enum ieee80211_neg_ttlm_res
+drv_can_neg_ttlm(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ enum ieee80211_neg_ttlm_res res = NEG_TTLM_RES_REJECT;
+
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_can_neg_ttlm(local, sdata, neg_ttlm);
+ if (local->ops->can_neg_ttlm)
+ res = local->ops->can_neg_ttlm(&local->hw, &sdata->vif,
+ neg_ttlm);
+ trace_drv_neg_ttlm_res(local, sdata, res, neg_ttlm);
+
+ return res;
+}
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h
index 12a6f0e9ec..59e3ec4dc9 100644
--- a/net/mac80211/drop.h
+++ b/net/mac80211/drop.h
@@ -2,7 +2,7 @@
/*
* mac80211 drop reason list
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#ifndef MAC80211_DROP_H
@@ -66,6 +66,7 @@ typedef unsigned int __bitwise ieee80211_rx_result;
R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \
R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \
R(RX_DROP_U_NOT_PORT_CONTROL) \
+ R(RX_DROP_U_UNKNOWN_ACTION_REJECTED) \
/* this line for the trailing \ - add before this */
/* having two enums allows for checking ieee80211_rx_result use with sparse */
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 9f5ffdc9db..ecbb042dd0 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
if (!he_spr_ie_elem)
return;
+
+ he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control;
data = he_spr_ie_elem->optional;
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
- data++;
+ he_obss_pd->non_srg_max_offset = *data++;
+
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
- he_obss_pd->max_offset = *data++;
he_obss_pd->min_offset = *data++;
+ he_obss_pd->max_offset = *data++;
+ memcpy(he_obss_pd->bss_color_bitmap, data, 8);
+ data += 8;
+ memcpy(he_obss_pd->partial_bssid_bitmap, data, 8);
he_obss_pd->enable = true;
}
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 749f4ecab9..d7e8cf8e48 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2023 Intel Corporation
+ * Copyright(c) 2020-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -257,7 +257,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!link_conf))
width = NL80211_CHAN_WIDTH_20_NOHT;
else
- width = link_conf->chandef.width;
+ width = link_conf->chanreq.oper.width;
switch (width) {
default:
@@ -580,7 +580,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
/* we'll do more on status of this frame */
info = IEEE80211_SKB_CB(skb);
info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- /* we have 12 bits, and need 6: link_id 4, smps 2 */
+ /* we have 13 bits, and need 6: link_id 4, smps 2 */
info->status_data = IEEE80211_STATUS_TYPE_SMPS |
u16_encode_bits(status_link_id << 2 | smps,
IEEE80211_STATUS_SUBDATA_MASK);
@@ -603,6 +603,8 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id,
if (WARN_ON(!link))
goto out;
+ trace_api_request_smps(sdata->local, sdata, link, smps_mode);
+
if (link->u.mgd.driver_smps_mode == smps_mode)
goto out;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 8f2b445a5e..7ace5cdc6c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -223,7 +223,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
struct cfg80211_bss *bss;
u64 bss_change;
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq = {};
struct ieee80211_channel *chan;
struct beacon_data *presp;
struct cfg80211_inform_bss bss_meta = {};
@@ -237,7 +237,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
drv_reset_tsf(local, sdata);
if (!ether_addr_equal(ifibss->bssid, bssid))
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
/* if merging, indicate to driver that we leave the old IBSS */
if (sdata->vif.cfg.ibss_joined) {
@@ -257,22 +257,22 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
kfree_rcu(presp, rcu_head);
/* make a copy of the chandef, it could be modified below. */
- chandef = *req_chandef;
- chan = chandef.chan;
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ chanreq.oper = *req_chandef;
+ chan = chanreq.oper.chan;
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper,
NL80211_IFTYPE_ADHOC)) {
- if (chandef.width == NL80211_CHAN_WIDTH_5 ||
- chandef.width == NL80211_CHAN_WIDTH_10 ||
- chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- chandef.width == NL80211_CHAN_WIDTH_20) {
+ if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_10 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_20) {
sdata_info(sdata,
"Failed to join IBSS, beacons forbidden\n");
return;
}
- chandef.width = NL80211_CHAN_WIDTH_20;
- chandef.center_freq1 = chan->center_freq;
+ chanreq.oper.width = NL80211_CHAN_WIDTH_20;
+ chanreq.oper.center_freq1 = chan->center_freq;
/* check again for downgraded chandef */
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper,
NL80211_IFTYPE_ADHOC)) {
sdata_info(sdata,
"Failed to join IBSS, beacons forbidden\n");
@@ -281,7 +281,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &chandef, NL80211_IFTYPE_ADHOC);
+ &chanreq.oper, NL80211_IFTYPE_ADHOC);
if (err < 0) {
sdata_info(sdata,
"Failed to join IBSS, invalid chandef\n");
@@ -295,7 +295,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
radar_required = err;
- if (ieee80211_link_use_channel(&sdata->deflink, &chandef,
+ if (ieee80211_link_use_channel(&sdata->deflink, &chanreq,
ifibss->fixed_channel ?
IEEE80211_CHANCTX_SHARED :
IEEE80211_CHANCTX_EXCLUSIVE)) {
@@ -307,7 +307,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
memcpy(ifibss->bssid, bssid, ETH_ALEN);
presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
- capability, tsf, &chandef,
+ capability, tsf, &chanreq.oper,
&have_higher_than_11mbit, NULL);
if (!presp)
return;
@@ -533,12 +533,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
IEEE80211_PRIVACY(ifibss->privacy));
/* XXX: should not really modify cfg80211 data */
if (cbss) {
- cbss->channel = sdata->deflink.csa_chandef.chan;
+ cbss->channel = sdata->deflink.csa_chanreq.oper.chan;
cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
}
}
- ifibss->chandef = sdata->deflink.csa_chandef;
+ ifibss->chandef = sdata->deflink.csa_chanreq.oper;
/* generate the beacon */
return ieee80211_ibss_csa_beacon(sdata, NULL, changed);
@@ -682,7 +682,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
spin_lock_bh(&ifibss->incomplete_lock);
while (!list_empty(&ifibss->incomplete_stations)) {
@@ -757,21 +757,22 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
enum nl80211_channel_type ch_type;
int err;
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn = {
+ .mode = IEEE80211_CONN_MODE_HT,
+ .bw_limit = IEEE80211_CONN_BW_LIMIT_40,
+ };
u32 vht_cap_info = 0;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- conn_flags = IEEE80211_CONN_DISABLE_VHT;
-
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20_NOHT:
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ conn.mode = IEEE80211_CONN_MODE_LEGACY;
fallthrough;
case NL80211_CHAN_WIDTH_20:
- conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
break;
default:
break;
@@ -783,8 +784,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
- vht_cap_info,
- conn_flags, ifibss->bssid, &csa_ie);
+ vht_cap_info, &conn,
+ ifibss->bssid, &csa_ie);
/* can't switch to destination channel, fail */
if (err < 0)
goto disconnect;
@@ -798,7 +799,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
goto disconnect;
params.count = csa_ie.count;
- params.chandef = csa_ie.chandef;
+ params.chandef = csa_ie.chanreq.oper;
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
@@ -857,7 +858,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
params.radar_required = err;
if (cfg80211_chandef_identical(&params.chandef,
- &sdata->vif.bss_conf.chandef)) {
+ &sdata->vif.bss_conf.chanreq.oper)) {
ibss_dbg(sdata,
"received csa with an identical chandef, ignoring\n");
return true;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a18361afea..3cedfdc909 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef IEEE80211_I_H
@@ -89,7 +89,8 @@ enum ieee80211_status_data {
IEEE80211_STATUS_TYPE_MASK = 0x00f,
IEEE80211_STATUS_TYPE_INVALID = 0,
IEEE80211_STATUS_TYPE_SMPS = 1,
- IEEE80211_STATUS_SUBDATA_MASK = 0xff0,
+ IEEE80211_STATUS_TYPE_NEG_TTLM = 2,
+ IEEE80211_STATUS_SUBDATA_MASK = 0x1ff0,
};
static inline bool
@@ -131,7 +132,7 @@ struct ieee80211_bss {
};
/**
- * enum ieee80211_corrupt_data_flags - BSS data corruption flags
+ * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
* @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
* @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
*
@@ -144,7 +145,7 @@ enum ieee80211_bss_corrupt_data_flags {
};
/**
- * enum ieee80211_valid_data_flags - BSS valid data flags
+ * enum ieee80211_bss_valid_data_flags - BSS valid data flags
* @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
@@ -370,19 +371,32 @@ enum ieee80211_sta_flags {
IEEE80211_STA_ENABLE_RRM = BIT(15),
};
-typedef u32 __bitwise ieee80211_conn_flags_t;
-
-enum ieee80211_conn_flags {
- IEEE80211_CONN_DISABLE_HT = (__force ieee80211_conn_flags_t)BIT(0),
- IEEE80211_CONN_DISABLE_40MHZ = (__force ieee80211_conn_flags_t)BIT(1),
- IEEE80211_CONN_DISABLE_VHT = (__force ieee80211_conn_flags_t)BIT(2),
- IEEE80211_CONN_DISABLE_80P80MHZ = (__force ieee80211_conn_flags_t)BIT(3),
- IEEE80211_CONN_DISABLE_160MHZ = (__force ieee80211_conn_flags_t)BIT(4),
- IEEE80211_CONN_DISABLE_HE = (__force ieee80211_conn_flags_t)BIT(5),
- IEEE80211_CONN_DISABLE_EHT = (__force ieee80211_conn_flags_t)BIT(6),
- IEEE80211_CONN_DISABLE_320MHZ = (__force ieee80211_conn_flags_t)BIT(7),
+enum ieee80211_conn_mode {
+ IEEE80211_CONN_MODE_S1G,
+ IEEE80211_CONN_MODE_LEGACY,
+ IEEE80211_CONN_MODE_HT,
+ IEEE80211_CONN_MODE_VHT,
+ IEEE80211_CONN_MODE_HE,
+ IEEE80211_CONN_MODE_EHT,
};
+#define IEEE80211_CONN_MODE_HIGHEST IEEE80211_CONN_MODE_EHT
+
+enum ieee80211_conn_bw_limit {
+ IEEE80211_CONN_BW_LIMIT_20,
+ IEEE80211_CONN_BW_LIMIT_40,
+ IEEE80211_CONN_BW_LIMIT_80,
+ IEEE80211_CONN_BW_LIMIT_160, /* also 80+80 */
+ IEEE80211_CONN_BW_LIMIT_320,
+};
+
+struct ieee80211_conn_settings {
+ enum ieee80211_conn_mode mode;
+ enum ieee80211_conn_bw_limit bw_limit;
+};
+
+extern const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited;
+
struct ieee80211_mgd_auth_data {
struct cfg80211_bss *bss;
unsigned long timeout;
@@ -416,7 +430,7 @@ struct ieee80211_mgd_assoc_data {
size_t elems_len;
u8 *elems; /* pointing to inside ie[] below */
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn;
u16 status;
@@ -441,6 +455,7 @@ struct ieee80211_mgd_assoc_data {
bool timeout_started;
bool comeback; /* whether the AP has requested association comeback */
bool s1g;
+ bool spp_amsdu;
unsigned int assoc_link_id;
@@ -509,6 +524,8 @@ struct ieee80211_if_managed {
unsigned int flags;
+ u16 mcast_seq_last;
+
bool status_acked;
bool status_received;
__le16 status_fc;
@@ -579,6 +596,11 @@ struct ieee80211_if_managed {
/* TID-to-link mapping support */
struct wiphy_delayed_work ttlm_work;
struct ieee80211_adv_ttlm_info ttlm_info;
+ struct wiphy_work teardown_ttlm_work;
+
+ /* dialog token enumerator for neg TTLM request */
+ u8 dialog_token_alloc;
+ struct wiphy_delayed_work neg_ttlm_timeout_work;
};
struct ieee80211_if_ibss {
@@ -664,7 +686,7 @@ struct mesh_csa_settings {
};
/**
- * struct mesh_table
+ * struct mesh_table - mesh hash table
*
* @known_gates: list of known mesh gates and their mpaths by the station. The
* gate's mpath may or may not be resolved and active.
@@ -866,6 +888,9 @@ struct ieee80211_chanctx {
enum ieee80211_chanctx_mode mode;
bool driver_present;
+ /* temporary data for search algorithm etc. */
+ struct ieee80211_chan_req req;
+
struct ieee80211_chanctx_conf conf;
};
@@ -938,7 +963,7 @@ struct ieee80211_link_data_managed {
enum ieee80211_smps_mode req_smps, /* requested smps mode */
driver_smps_mode; /* smps mode request */
- ieee80211_conn_flags_t conn_flags;
+ struct ieee80211_conn_settings conn;
s16 p2p_noa_index;
@@ -951,6 +976,7 @@ struct ieee80211_link_data_managed {
bool csa_waiting_bcn;
bool csa_ignored_same_chan;
+ bool csa_blocked_tx;
struct wiphy_delayed_work chswitch_work;
struct wiphy_work request_smps_work;
@@ -983,8 +1009,6 @@ struct ieee80211_link_data_managed {
int mu_edca_last_param_set;
u8 bss_param_ch_cnt;
-
- struct cfg80211_bss *bss;
};
struct ieee80211_link_data_ap {
@@ -1013,11 +1037,10 @@ struct ieee80211_link_data {
struct ieee80211_key __rcu *default_beacon_key;
struct wiphy_work csa_finalize_work;
- bool csa_block_tx;
bool operating_11g_mode;
- struct cfg80211_chan_def csa_chandef;
+ struct ieee80211_chan_req csa_chanreq;
struct wiphy_work color_change_finalize_work;
struct delayed_work color_collision_detect_work;
@@ -1025,7 +1048,7 @@ struct ieee80211_link_data {
/* context reservation -- protected with wiphy mutex */
struct ieee80211_chanctx *reserved_chanctx;
- struct cfg80211_chan_def reserved_chandef;
+ struct ieee80211_chan_req reserved;
bool reserved_radar_required;
bool reserved_ready;
@@ -1072,6 +1095,8 @@ struct ieee80211_sub_if_data {
unsigned long state;
+ bool csa_blocked_queues;
+
char name[IFNAMSIZ];
struct ieee80211_fragment_cache frags;
@@ -1137,6 +1162,8 @@ struct ieee80211_sub_if_data {
struct wiphy_work activate_links_work;
u16 desired_active_links;
+ u16 restart_active_links;
+
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
struct dentry *subdir_stations;
@@ -1160,6 +1187,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
#define sdata_dereference(p, sdata) \
wiphy_dereference(sdata->local->hw.wiphy, p)
+#define for_each_sdata_link(_local, _link) \
+ /* outer loop just to define the variables ... */ \
+ for (struct ieee80211_sub_if_data *___sdata = NULL; \
+ !___sdata; \
+ ___sdata = (void *)~0 /* always stop */) \
+ list_for_each_entry(___sdata, &(_local)->interfaces, list) \
+ if (ieee80211_sdata_running(___sdata)) \
+ for (int ___link_id = 0; \
+ ___link_id < ARRAY_SIZE(___sdata->link); \
+ ___link_id++) \
+ if ((_link = wiphy_dereference((local)->hw.wiphy, \
+ ___sdata->link[___link_id])))
+
static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
struct cfg80211_rnr_elems *rnr_elems,
@@ -1330,7 +1370,8 @@ struct ieee80211_local {
bool wiphy_ciphers_allocated;
- bool use_chanctx;
+ struct cfg80211_chan_def dflt_chandef;
+ bool emulate_chanctx;
/* protects the aggregated multicast list and filter calls */
spinlock_t filter_lock;
@@ -1456,8 +1497,6 @@ struct ieee80211_local {
enum mac80211_scan_state next_scan_state;
struct wiphy_delayed_work scan_work;
struct ieee80211_sub_if_data __rcu *scan_sdata;
- /* For backward compatibility only -- do not use */
- struct cfg80211_chan_def _oper_chandef;
/* Temporary remain-on-channel for off-channel operations */
struct ieee80211_channel *tmp_channel;
@@ -1531,8 +1570,6 @@ struct ieee80211_local {
int user_power_level; /* in dBm, for all interfaces */
- enum ieee80211_smps_mode smps_mode;
-
struct work_struct restart_work;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1559,7 +1596,7 @@ struct ieee80211_local {
/* virtual monitor interface */
struct ieee80211_sub_if_data __rcu *monitor_sdata;
- struct cfg80211_chan_def monitor_chandef;
+ struct ieee80211_chan_req monitor_chanreq;
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
@@ -1624,7 +1661,7 @@ ieee80211_get_link_sband(struct ieee80211_link_data *link)
/* this struct holds the value parsing from channel switch IE */
struct ieee80211_csa_ie {
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq;
u8 mode;
u8 count;
u8 ttl;
@@ -1633,6 +1670,14 @@ struct ieee80211_csa_ie {
u32 max_switch_time;
};
+enum ieee80211_elems_parse_error {
+ IEEE80211_PARSE_ERR_INVALID_END = BIT(0),
+ IEEE80211_PARSE_ERR_DUP_ELEM = BIT(1),
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE = BIT(2),
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM = BIT(3),
+ IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC = BIT(4),
+};
+
/* Parsed Information Elements */
struct ieee802_11_elems {
const u8 *ie_start;
@@ -1727,12 +1772,6 @@ struct ieee802_11_elems {
size_t ml_basic_len;
size_t ml_reconf_len;
- /* The basic Multi-Link element in the original IEs */
- const struct element *ml_basic_elem;
-
- /* The reconfiguration Multi-Link element in the original IEs */
- const struct element *ml_reconf_elem;
-
u8 ttlm_num;
/*
@@ -1743,16 +1782,8 @@ struct ieee802_11_elems {
struct ieee80211_mle_per_sta_profile *prof;
size_t sta_prof_len;
- /* whether a parse error occurred while retrieving these elements */
- bool parse_error;
-
- /*
- * scratch buffer that can be used for various element parsing related
- * tasks, e.g., element de-fragmentation etc.
- */
- size_t scratch_len;
- u8 *scratch_pos;
- u8 scratch[] __counted_by(scratch_len);
+ /* whether/which parse error occurred while retrieving these elements */
+ u8 parse_error;
};
static inline struct ieee80211_local *hw_to_local(
@@ -1801,6 +1832,8 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
unsigned int mpdu_len,
unsigned int mpdu_offset);
int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
+int ieee80211_hw_conf_chan(struct ieee80211_local *local);
+void ieee80211_hw_conf_init(struct ieee80211_local *local);
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed);
@@ -1812,6 +1845,8 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
void ieee80211_configure_filter(struct ieee80211_local *local);
u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
+void ieee80211_handle_queued_frames(struct ieee80211_local *local);
+
u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
u64 *cookie, gfp_t gfp);
@@ -2166,9 +2201,8 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* @elems: parsed 802.11 elements received with the frame
* @current_band: indicates the current band
* @vht_cap_info: VHT capabilities of the transmitter
- * @conn_flags: contains information about own capabilities and restrictions
- * to decide which channel switch announcements can be accepted, using
- * flags from &enum ieee80211_conn_flags.
+ * @conn: contains information about own capabilities and restrictions
+ * to decide which channel switch announcements can be accepted
* @bssid: the currently connected bssid (for reporting)
* @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
* All of them will be filled with if success only.
@@ -2178,7 +2212,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- ieee80211_conn_flags_t conn_flags, u8 *bssid,
+ struct ieee80211_conn_settings *conn,
+ u8 *bssid,
struct ieee80211_csa_ie *csa_ie);
/* Suspend/resume and hw reconfiguration */
@@ -2202,6 +2237,9 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
+const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode);
+enum ieee80211_conn_bw_limit
+ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef);
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
int rate, int erp, int short_preamble);
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -2243,6 +2281,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
/**
* struct ieee80211_elems_parse_params - element parsing parameters
+ * @mode: connection mode for parsing
* @start: pointer to the elements
* @len: length of the elements
* @action: %true if the elements came from an action frame
@@ -2260,6 +2299,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
* for EHT capabilities parsing)
*/
struct ieee80211_elems_parse_params {
+ enum ieee80211_conn_mode mode;
const u8 *start;
size_t len;
bool action;
@@ -2279,6 +2319,7 @@ ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
struct cfg80211_bss *bss)
{
struct ieee80211_elems_parse_params params = {
+ .mode = IEEE80211_CONN_MODE_HIGHEST,
.start = start,
.len = len,
.action = action,
@@ -2408,7 +2449,6 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
const u8 *da, const u8 *bssid,
u16 stype, u16 reason,
bool send_frame, u8 *frame_buf);
-u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end);
enum {
IEEE80211_PROBE_FLAG_DIRECTED = BIT(0),
@@ -2453,32 +2493,36 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
-u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- u8 *end);
-void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
- enum ieee80211_smps_mode smps_mode,
- struct sk_buff *skb);
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata);
u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap);
int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates);
-int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band);
-int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band);
u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_s1g_cap *caps,
struct sk_buff *skb);
void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
-u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap);
+
+/* element building in SKBs */
+int ieee80211_put_srates_elem(struct sk_buff *skb,
+ const struct ieee80211_supported_band *sband,
+ u32 basic_rates, u32 rate_flags, u32 masked_rates,
+ u8 element_id);
+int ieee80211_put_he_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn);
+int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode);
+int ieee80211_put_eht_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn);
/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
@@ -2488,23 +2532,46 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
- bool support_160, bool support_320,
struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
const struct ieee80211_he_operation *he_oper,
const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
struct cfg80211_chan_def *chandef);
-ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
+void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef,
+ struct ieee80211_conn_settings *conn);
+static inline void
+ieee80211_chanreq_downgrade(struct ieee80211_chan_req *chanreq,
+ struct ieee80211_conn_settings *conn)
+{
+ ieee80211_chandef_downgrade(&chanreq->oper, conn);
+ if (WARN_ON(!conn))
+ return;
+ if (conn->mode < IEEE80211_CONN_MODE_EHT)
+ chanreq->ap.chan = NULL;
+}
+
+bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
+ const struct ieee80211_chan_req *b);
int __must_check
+_ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *req,
+ enum ieee80211_chanctx_mode mode,
+ bool assign_on_failure);
+
+static inline int __must_check
ieee80211_link_use_channel(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode);
+ const struct ieee80211_chan_req *req,
+ enum ieee80211_chanctx_mode mode)
+{
+ return _ieee80211_link_use_channel(link, req, mode, false);
+}
+
int __must_check
ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
+ const struct ieee80211_chan_req *req,
enum ieee80211_chanctx_mode mode,
bool radar_required);
int __must_check
@@ -2512,9 +2579,11 @@ ieee80211_link_use_reserved_context(struct ieee80211_link_data *link);
int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
int __must_check
-ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
- const struct cfg80211_chan_def *chandef,
- u64 *changed);
+ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
+ const struct ieee80211_chan_req *req,
+ u64 *changed);
+void __ieee80211_link_release_channel(struct ieee80211_link_data *link,
+ bool skip_idle_recalc);
void ieee80211_link_release_channel(struct ieee80211_link_data *link);
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
@@ -2561,7 +2630,7 @@ int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct net_device *dev,
const u8 *addr);
-void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
+void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link);
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason);
void
@@ -2589,12 +2658,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
-u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_eht_cap(u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- const struct ieee80211_sta_eht_cap *eht_cap,
- u8 *end,
- bool for_ap);
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata);
void
ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
@@ -2603,6 +2667,12 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len);
+void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len);
+int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ttlm_params *params);
void ieee80211_check_wbrf_support(struct ieee80211_local *local);
void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 11c4caa474..b935bb5d8e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -511,7 +511,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
* would have removed them, but in other modes there shouldn't
* be any stations.
*/
- flushed = sta_info_flush(sdata);
+ flushed = sta_info_flush(sdata, -1);
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0);
/* don't count this interface for allmulti while it is down */
@@ -544,10 +544,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
sdata->vif.bss_conf.csa_active = false;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->deflink.csa_block_tx) {
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
@@ -557,7 +557,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chandef;
+ chandef = sdata->vif.bss_conf.chanreq.oper;
WARN_ON(local->suspended);
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
@@ -686,6 +686,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_del_virtual_monitor(local);
ieee80211_recalc_idle(local);
+ ieee80211_recalc_offload(local);
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
break;
@@ -1121,9 +1122,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
int ret;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return 0;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1145,11 +1143,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
ieee80211_set_default_queues(sdata);
- ret = drv_add_interface(local, sdata);
- if (WARN_ON(ret)) {
- /* ok .. stupid driver, it asked for this! */
- kfree(sdata);
- return ret;
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
+ ret = drv_add_interface(local, sdata);
+ if (WARN_ON(ret)) {
+ /* ok .. stupid driver, it asked for this! */
+ kfree(sdata);
+ return ret;
+ }
}
set_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -1164,7 +1164,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
rcu_assign_pointer(local->monitor_sdata, sdata);
mutex_unlock(&local->iflist_mtx);
- ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef,
+ ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chanreq,
IEEE80211_CHANCTX_EXCLUSIVE);
if (ret) {
mutex_lock(&local->iflist_mtx);
@@ -1187,9 +1187,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
- return;
-
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1209,7 +1206,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
ieee80211_link_release_channel(&sdata->deflink);
- drv_remove_interface(local, sdata);
+ if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ drv_remove_interface(local, sdata);
kfree(sdata);
}
@@ -1252,7 +1250,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
sdata->vif.cab_queue = master->vif.cab_queue;
memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
sizeof(sdata->vif.hw_queue));
- sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef;
+ sdata->vif.bss_conf.chanreq = master->vif.bss_conf.chanreq;
sdata->crypto_tx_tailroom_needed_cnt +=
master->crypto_tx_tailroom_needed_cnt;
@@ -1288,8 +1286,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
res = drv_start(local);
if (res)
goto err_del_bss;
- /* we're brought up, everything changes */
- hw_reconf_flags = ~0;
ieee80211_led_radio(local, true);
ieee80211_mod_tpt_led_trig(local,
IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
@@ -1436,7 +1432,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (coming_up)
local->open_count++;
- if (hw_reconf_flags)
+ if (local->open_count == 1)
+ ieee80211_hw_conf_init(local);
+ else if (hw_reconf_flags)
ieee80211_hw_config(local, hw_reconf_flags);
ieee80211_recalc_ps(local);
@@ -1546,6 +1544,22 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
default:
break;
}
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) {
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ switch (mgmt->u.action.u.ttlm_req.action_code) {
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
+ ieee80211_process_neg_ttlm_req(sdata, mgmt,
+ skb->len);
+ break;
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
+ ieee80211_process_neg_ttlm_res(sdata, mgmt,
+ skb->len);
+ break;
+ default:
+ break;
+ }
+ }
} else if (ieee80211_is_ext(mgmt->frame_control)) {
if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_rx_queued_ext(sdata, skb);
@@ -1683,8 +1697,13 @@ static void ieee80211_activate_links_work(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
activate_links_work);
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+
+ if (local->in_reconfig)
+ return;
ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links);
+ sdata->desired_active_links = 0;
}
/*
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index af74d7f9d9..eecdd2265e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2020, 2022-2023 Intel Corporation
+ * Copyright 2018-2020, 2022-2024 Intel Corporation
*/
#include <crypto/utils.h>
@@ -925,6 +925,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
*/
key->color = atomic_inc_return(&key_color);
+ /* keep this flag for easier access later */
+ if (sta && sta->sta.spp_amsdu)
+ key->conf.flags |= IEEE80211_KEY_FLAG_SPP_AMSDU;
+
increment_tailroom_need_count(sdata);
ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key);
@@ -1368,12 +1372,19 @@ EXPORT_SYMBOL_GPL(ieee80211_remove_key);
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf)
+ struct ieee80211_key_conf *keyconf,
+ int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *key;
int err;
+ struct ieee80211_link_data *link_data =
+ link_id < 0 ? &sdata->deflink :
+ sdata_dereference(sdata->link[link_id], sdata);
+
+ if (WARN_ON(!link_data))
+ return ERR_PTR(-EINVAL);
if (WARN_ON(!local->wowlan))
return ERR_PTR(-EINVAL);
@@ -1390,8 +1401,9 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
- /* FIXME: this function needs to get a link ID */
- err = ieee80211_key_link(key, &sdata->deflink, NULL);
+ key->conf.link_id = link_id;
+
+ err = ieee80211_key_link(key, link_data, NULL);
if (err)
return ERR_PTR(err);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index d4f86955af..af0321408a 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022-2023 Intel Corporation
+ * Copyright (C) 2022-2024 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -73,6 +73,8 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
ieee80211_mgd_stop_link(link);
cancel_delayed_work_sync(&link->color_collision_detect_work);
+ wiphy_work_cancel(link->sdata->local->hw.wiphy,
+ &link->csa_finalize_work);
ieee80211_link_release_channel(link);
}
@@ -354,9 +356,9 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- /* FIXME: kill TDLS connections on the link */
+ ieee80211_teardown_tdls_peers(link);
- ieee80211_link_release_channel(link);
+ __ieee80211_link_release_channel(link, true);
}
list_for_each_entry(sta, &local->sta_list, list) {
@@ -402,8 +404,24 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- ret = ieee80211_link_use_channel(link, &link->conf->chandef,
- IEEE80211_CHANCTX_SHARED);
+ /*
+ * This call really should not fail. Unfortunately, it appears
+ * that this may happen occasionally with some drivers. Should
+ * it happen, we are stuck in a bad place as going backwards is
+ * not really feasible.
+ *
+ * So lets just tell link_use_channel that it must not fail to
+ * assign the channel context (from mac80211's perspective) and
+ * assume the driver is going to trigger a recovery flow if it
+ * had a failure.
+ * That really is not great nor guaranteed to work. But at least
+ * the internal mac80211 state remains consistent and there is
+ * a chance that we can recover.
+ */
+ ret = _ieee80211_link_use_channel(link,
+ &link->conf->chanreq,
+ IEEE80211_CHANCTX_SHARED,
+ true);
WARN_ON_ONCE(ret);
ieee80211_mgd_set_link_qos_params(link);
@@ -444,10 +462,16 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
lockdep_assert_wiphy(local->hw.wiphy);
- if (!drv_can_activate_links(local, sdata, active_links))
+ if (WARN_ON(!active_links))
return -EINVAL;
old_active = sdata->vif.active_links;
+ if (old_active == active_links)
+ return 0;
+
+ if (!drv_can_activate_links(local, sdata, active_links))
+ return -EINVAL;
+
if (old_active & active_links) {
/*
* if there's at least one link that stays active across
@@ -472,6 +496,9 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ if (WARN_ON(!active_links))
+ return;
+
if (!ieee80211_sdata_running(sdata))
return;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f2ece77935..7ba329ebdd 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -93,16 +93,32 @@ static void ieee80211_reconfig_filter(struct wiphy *wiphy,
ieee80211_configure_filter(local);
}
-static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
+static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
+ struct ieee80211_chanctx_conf *ctx)
{
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef = {};
+ struct cfg80211_chan_def *oper = NULL;
+ enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_STATIC;
u32 changed = 0;
int power;
u32 offchannel_flag;
+ if (!local->emulate_chanctx)
+ return 0;
+
offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
+ if (ctx && !WARN_ON(!ctx->def.chan)) {
+ oper = &ctx->def;
+ if (ctx->rx_chains_static > 1)
+ smps_mode = IEEE80211_SMPS_OFF;
+ else if (ctx->rx_chains_dynamic > 1)
+ smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ smps_mode = IEEE80211_SMPS_STATIC;
+ }
+
if (local->scan_chandef.chan) {
chandef = local->scan_chandef;
} else if (local->tmp_channel) {
@@ -110,25 +126,30 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
chandef.center_freq1 = chandef.chan->center_freq;
chandef.freq1_offset = chandef.chan->freq_offset;
- } else
- chandef = local->_oper_chandef;
+ } else if (oper) {
+ chandef = *oper;
+ } else {
+ chandef = local->dflt_chandef;
+ }
- WARN(!cfg80211_chandef_valid(&chandef),
- "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
- chandef.chan->center_freq, chandef.chan->freq_offset,
- chandef.width, chandef.center_freq1, chandef.freq1_offset,
- chandef.center_freq2);
+ if (WARN(!cfg80211_chandef_valid(&chandef),
+ "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
+ chandef.chan ? chandef.chan->center_freq : -1,
+ chandef.chan ? chandef.chan->freq_offset : 0,
+ chandef.width, chandef.center_freq1, chandef.freq1_offset,
+ chandef.center_freq2))
+ return 0;
- if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
+ if (!oper || !cfg80211_chandef_identical(&chandef, oper))
local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
else
local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
- if (offchannel_flag ||
- !cfg80211_chandef_identical(&local->hw.conf.chandef,
- &local->_oper_chandef)) {
+ /* force it also for scanning, since drivers might config differently */
+ if (offchannel_flag || local->scanning || local->in_reconfig ||
+ !cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) {
local->hw.conf.chandef = chandef;
changed |= IEEE80211_CONF_CHANGE_CHANNEL;
}
@@ -140,8 +161,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
* that otherwise STATIC is used.
*/
local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC;
- } else if (local->hw.conf.smps_mode != local->smps_mode) {
- local->hw.conf.smps_mode = local->smps_mode;
+ } else if (local->hw.conf.smps_mode != smps_mode) {
+ local->hw.conf.smps_mode = smps_mode;
changed |= IEEE80211_CONF_CHANGE_SMPS;
}
@@ -173,12 +194,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
might_sleep();
- if (!local->use_chanctx)
- changed |= ieee80211_hw_conf_chan(local);
- else
- changed &= ~(IEEE80211_CONF_CHANGE_CHANNEL |
- IEEE80211_CONF_CHANGE_POWER |
- IEEE80211_CONF_CHANGE_SMPS);
+ WARN_ON(changed & (IEEE80211_CONF_CHANGE_CHANNEL |
+ IEEE80211_CONF_CHANGE_POWER |
+ IEEE80211_CONF_CHANGE_SMPS));
if (changed && local->open_count) {
ret = drv_config(local, changed);
@@ -202,13 +220,115 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
return ret;
}
+/* for scanning, offchannel and chanctx emulation only */
+static int _ieee80211_hw_conf_chan(struct ieee80211_local *local,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ u32 changed;
+
+ if (!local->open_count)
+ return 0;
+
+ changed = ieee80211_calc_hw_conf_chan(local, ctx);
+ if (!changed)
+ return 0;
+
+ return drv_config(local, changed);
+}
+
+int ieee80211_hw_conf_chan(struct ieee80211_local *local)
+{
+ struct ieee80211_chanctx *ctx;
+
+ ctx = list_first_entry_or_null(&local->chanctx_list,
+ struct ieee80211_chanctx,
+ list);
+
+ return _ieee80211_hw_conf_chan(local, ctx ? &ctx->conf : NULL);
+}
+
+void ieee80211_hw_conf_init(struct ieee80211_local *local)
+{
+ u32 changed = ~(IEEE80211_CONF_CHANGE_CHANNEL |
+ IEEE80211_CONF_CHANGE_POWER |
+ IEEE80211_CONF_CHANGE_SMPS);
+
+ if (WARN_ON(!local->open_count))
+ return;
+
+ if (local->emulate_chanctx) {
+ struct ieee80211_chanctx *ctx;
+
+ ctx = list_first_entry_or_null(&local->chanctx_list,
+ struct ieee80211_chanctx,
+ list);
+
+ changed |= ieee80211_calc_hw_conf_chan(local,
+ ctx ? &ctx->conf : NULL);
+ }
+
+ WARN_ON(drv_config(local, changed));
+}
+
+int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = ctx->radar_enabled;
+
+ return _ieee80211_hw_conf_chan(local, ctx);
+}
+EXPORT_SYMBOL(ieee80211_emulate_add_chanctx);
+
+void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = false;
+
+ _ieee80211_hw_conf_chan(local, NULL);
+}
+EXPORT_SYMBOL(ieee80211_emulate_remove_chanctx);
+
+void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx,
+ u32 changed)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ local->hw.conf.radar_enabled = ctx->radar_enabled;
+
+ _ieee80211_hw_conf_chan(local, ctx);
+}
+EXPORT_SYMBOL(ieee80211_emulate_change_chanctx);
+
+int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (n_vifs <= 0)
+ return -EINVAL;
+
+ local->hw.conf.radar_enabled = vifs[0].new_ctx->radar_enabled;
+ _ieee80211_hw_conf_chan(local, vifs[0].new_ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_emulate_switch_vif_chanctx);
+
#define BSS_CHANGED_VIF_CFG_FLAGS (BSS_CHANGED_ASSOC |\
BSS_CHANGED_IDLE |\
BSS_CHANGED_PS |\
BSS_CHANGED_IBSS |\
BSS_CHANGED_ARP_FILTER |\
BSS_CHANGED_SSID |\
- BSS_CHANGED_MLD_VALID_LINKS)
+ BSS_CHANGED_MLD_VALID_LINKS |\
+ BSS_CHANGED_MLD_TTLM)
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed)
@@ -303,9 +423,9 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_ERP_SLOT;
}
-static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+/* context: requires softirqs disabled */
+void ieee80211_handle_queued_frames(struct ieee80211_local *local)
{
- struct ieee80211_local *local = from_tasklet(local, t, tasklet);
struct sk_buff *skb;
while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -330,6 +450,13 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t)
}
}
+static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+{
+ struct ieee80211_local *local = from_tasklet(local, t, tasklet);
+
+ ieee80211_handle_queued_frames(local);
+}
+
static void ieee80211_restart_work(struct work_struct *work)
{
struct ieee80211_local *local =
@@ -644,7 +771,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
struct ieee80211_local *local;
int priv_size, i;
struct wiphy *wiphy;
- bool use_chanctx;
+ bool emulate_chanctx;
if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config ||
!ops->add_interface || !ops->remove_interface ||
@@ -659,12 +786,26 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
return NULL;
/* check all or no channel context operations exist */
- i = !!ops->add_chanctx + !!ops->remove_chanctx +
- !!ops->change_chanctx + !!ops->assign_vif_chanctx +
- !!ops->unassign_vif_chanctx;
- if (WARN_ON(i != 0 && i != 5))
- return NULL;
- use_chanctx = i == 5;
+ if (ops->add_chanctx == ieee80211_emulate_add_chanctx &&
+ ops->remove_chanctx == ieee80211_emulate_remove_chanctx &&
+ ops->change_chanctx == ieee80211_emulate_change_chanctx) {
+ if (WARN_ON(ops->assign_vif_chanctx ||
+ ops->unassign_vif_chanctx))
+ return NULL;
+ emulate_chanctx = true;
+ } else {
+ if (WARN_ON(ops->add_chanctx == ieee80211_emulate_add_chanctx ||
+ ops->remove_chanctx == ieee80211_emulate_remove_chanctx ||
+ ops->change_chanctx == ieee80211_emulate_change_chanctx))
+ return NULL;
+ if (WARN_ON(!ops->add_chanctx ||
+ !ops->remove_chanctx ||
+ !ops->change_chanctx ||
+ !ops->assign_vif_chanctx ||
+ !ops->unassign_vif_chanctx))
+ return NULL;
+ emulate_chanctx = false;
+ }
/* Ensure 32-byte alignment of our private data and hw private data.
* We use the wiphy priv data for both our ieee80211_local and for
@@ -698,7 +839,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
WIPHY_FLAG_REPORTS_OBSS |
WIPHY_FLAG_OFFCHAN_TX;
- if (!use_chanctx || ops->remain_on_channel)
+ if (emulate_chanctx || ops->remain_on_channel)
wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
@@ -734,8 +875,11 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
}
- if (!ops->set_key)
+ if (!ops->set_key) {
wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT);
+ }
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS);
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM);
@@ -752,7 +896,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
local->ops = ops;
- local->use_chanctx = use_chanctx;
+ local->emulate_chanctx = emulate_chanctx;
+
+ if (emulate_chanctx)
+ ieee80211_hw_set(&local->hw, CHANCTX_STA_CSA);
/*
* We need a bit of data queued to build aggregates properly, so
@@ -829,7 +976,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ieee80211_dfs_radar_detected_work);
wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter);
- local->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&local->dynamic_ps_enable_work,
ieee80211_dynamic_ps_enable_work);
@@ -980,7 +1126,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
* as much, e.g. monitoring beacons would be hard if we
* might not even know which link is active at which time.
*/
- if (WARN_ON(!local->use_chanctx))
+ if (WARN_ON(local->emulate_chanctx))
return -EINVAL;
if (WARN_ON(!local->ops->link_info_changed))
@@ -1024,7 +1170,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
return -EINVAL;
#endif
- if (!local->use_chanctx) {
+ if (local->emulate_chanctx) {
for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
const struct ieee80211_iface_combination *comb;
@@ -1090,11 +1236,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
&sband->channels[i],
NL80211_CHAN_NO_HT);
/* init channel we're on */
- if (!local->use_chanctx && !local->_oper_chandef.chan) {
+ local->monitor_chanreq.oper = dflt_chandef;
+ if (local->emulate_chanctx) {
+ local->dflt_chandef = dflt_chandef;
local->hw.conf.chandef = dflt_chandef;
- local->_oper_chandef = dflt_chandef;
}
- local->monitor_chandef = dflt_chandef;
}
channels += sband->n_channels;
@@ -1115,8 +1261,26 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_vht = supp_vht || sband->vht_cap.vht_supported;
for_each_sband_iftype_data(sband, i, iftd) {
+ u8 he_40_mhz_cap;
+
supp_he = supp_he || iftd->he_cap.has_he;
supp_eht = supp_eht || iftd->eht_cap.has_eht;
+
+ if (band == NL80211_BAND_2GHZ)
+ he_40_mhz_cap =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
+ else
+ he_40_mhz_cap =
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
+
+ /* currently no support for HE client where HT has 40 MHz but not HT */
+ if (iftd->he_cap.has_he &&
+ iftd->types_mask & (BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT)) &&
+ sband->ht_cap.ht_supported &&
+ sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
+ !(iftd->he_cap.he_cap_elem.phy_cap_info[0] & he_40_mhz_cap))
+ return -EINVAL;
}
/* HT, VHT, HE require QoS, thus >= 4 queues */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 3d4806b7ff..6d4510221c 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -97,7 +97,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.bss_conf.basic_rates != basic_rates)
return false;
- cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
+ cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chanreq.oper.chan,
NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
@@ -107,10 +107,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
ie->vht_operation, ie->ht_operation,
&sta_chan_def);
- ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, ie->eht_operation,
+ ieee80211_chandef_he_6ghz_oper(sdata->local, ie->he_operation,
+ ie->eht_operation,
&sta_chan_def);
- if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
+ if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chanreq.oper,
&sta_chan_def))
return false;
@@ -435,9 +436,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!sband->ht_cap.ht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
@@ -476,16 +477,16 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!ht_cap->ht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation))
return -ENOMEM;
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
- ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef,
+ ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chanreq.oper,
sdata->vif.bss_conf.ht_operation_mode,
false);
@@ -507,9 +508,9 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!sband->vht_cap.vht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
@@ -548,9 +549,9 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
if (!vht_cap->vht_supported ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation))
@@ -558,7 +559,7 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
ieee80211_ie_build_vht_oper(pos, vht_cap,
- &sdata->vif.bss_conf.chandef);
+ &sdata->vif.bss_conf.chanreq.oper);
return 0;
}
@@ -566,29 +567,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ie_len)
{
- const struct ieee80211_sta_he_cap *he_cap;
struct ieee80211_supported_band *sband;
- u8 *pos;
sband = ieee80211_get_sband(sdata);
if (!sband)
return -EINVAL;
- he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
-
- if (!he_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
- if (skb_tailroom(skb) < ie_len)
- return -ENOMEM;
-
- pos = skb_put(skb, ie_len);
- ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len);
-
- return 0;
+ return ieee80211_put_he_cap(skb, sdata, sband, NULL);
}
int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
@@ -605,20 +595,20 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
if (!he_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
len = 2 + 1 + sizeof(struct ieee80211_he_operation);
- if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ)
+ if (sdata->vif.bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ)
len += sizeof(struct ieee80211_he_6ghz_oper);
if (skb_tailroom(skb) < len)
return -ENOMEM;
pos = skb_put(skb, len);
- ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef);
+ ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chanreq.oper);
return 0;
}
@@ -639,37 +629,25 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
if (!iftd)
return 0;
- ieee80211_ie_build_he_6ghz_cap(sdata, sdata->deflink.smps_mode, skb);
+ ieee80211_put_he_6ghz_cap(skb, sdata, sdata->deflink.smps_mode);
return 0;
}
int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ie_len)
{
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
struct ieee80211_supported_band *sband;
- u8 *pos;
sband = ieee80211_get_sband(sdata);
if (!sband)
return -EINVAL;
- he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
- eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
- if (!he_cap || !eht_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
- if (skb_tailroom(skb) < ie_len)
- return -ENOMEM;
-
- pos = skb_put(skb, ie_len);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + ie_len, false);
-
- return 0;
+ return ieee80211_put_eht_cap(skb, sdata, sband, NULL);
}
int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
@@ -685,9 +663,9 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk
eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
if (!eht_cap ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return 0;
len = 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) +
@@ -697,7 +675,7 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk
return -ENOMEM;
pos = skb_put(skb, len);
- ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chandef, eht_cap);
+ ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chanreq.oper, eht_cap);
return 0;
}
@@ -745,9 +723,9 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
return;
if (!ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT) ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10)
return;
sdata->vif.bss_conf.he_support = true;
@@ -972,24 +950,22 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
int head_len, tail_len;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
- struct ieee80211_chanctx_conf *chanctx_conf;
struct mesh_csa_settings *csa;
- enum nl80211_band band;
+ const struct ieee80211_supported_band *sband;
u8 ie_len_he_cap, ie_len_eht_cap;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
+ u32 rate_flags;
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
- band = chanctx_conf->def.chan->band;
- rcu_read_unlock();
- ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
- ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
+ sband = ieee80211_get_sband(sdata);
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
+
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata);
+ ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata);
head_len = hdr_len +
2 + /* NULL SSID */
/* Channel Switch Announcement */
@@ -1113,7 +1089,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
}
rcu_read_unlock();
- if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
+ if (ieee80211_put_srates_elem(skb, sband,
+ sdata->vif.bss_conf.basic_rates,
+ rate_flags, 0, WLAN_EID_SUPP_RATES) ||
mesh_add_ds_params_ie(sdata, skb))
goto out_free;
@@ -1124,7 +1102,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
skb_trim(skb, 0);
bcn->tail = bcn->head + bcn->head_len;
- if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
+ if (ieee80211_put_srates_elem(skb, sband,
+ sdata->vif.bss_conf.basic_rates,
+ rate_flags, 0, WLAN_EID_EXT_SUPP_RATES) ||
mesh_add_rsn_ie(sdata, skb) ||
mesh_add_ht_cap_ie(sdata, skb) ||
mesh_add_ht_oper_ie(sdata, skb) ||
@@ -1240,7 +1220,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
netif_carrier_off(sdata->dev);
/* flush STAs and mpaths on this iface */
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
ieee80211_free_keys(sdata, true);
mesh_path_flush_by_iface(sdata);
@@ -1282,11 +1262,12 @@ static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
* unavailable.
*/
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &sdata->vif.bss_conf.chandef,
+ &sdata->vif.bss_conf.chanreq.oper,
NL80211_IFTYPE_MESH_POINT);
if (err > 0)
cfg80211_radar_event(sdata->local->hw.wiphy,
- &sdata->vif.bss_conf.chandef, GFP_ATOMIC);
+ &sdata->vif.bss_conf.chanreq.oper,
+ GFP_ATOMIC);
}
static bool
@@ -1298,7 +1279,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_supported_band *sband;
int err;
- ieee80211_conn_flags_t conn_flags = 0;
+ struct ieee80211_conn_settings conn = ieee80211_conn_settings_unlimited;
u32 vht_cap_info = 0;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -1307,15 +1288,18 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (!sband)
return false;
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- fallthrough;
+ conn.mode = IEEE80211_CONN_MODE_LEGACY;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_20:
- conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
- fallthrough;
+ conn.mode = IEEE80211_CONN_MODE_HT;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_40:
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ conn.mode = IEEE80211_CONN_MODE_HT;
+ conn.bw_limit = IEEE80211_CONN_BW_LIMIT_40;
break;
default:
break;
@@ -1327,8 +1311,8 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
- vht_cap_info,
- conn_flags, sdata->vif.addr,
+ vht_cap_info, &conn,
+ sdata->vif.addr,
&csa_ie);
if (err < 0)
return false;
@@ -1341,7 +1325,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY)
ieee80211_mesh_csa_mark_radar(sdata);
- params.chandef = csa_ie.chandef;
+ params.chandef = csa_ie.chanreq.oper;
params.count = csa_ie.count;
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef,
@@ -1377,7 +1361,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
params.radar_required = err;
if (cfg80211_chandef_identical(&params.chandef,
- &sdata->vif.bss_conf.chandef)) {
+ &sdata->vif.bss_conf.chanreq.oper)) {
mcsa_dbg(sdata,
"received csa with an identical chandef, ignoring\n");
return true;
@@ -1557,7 +1541,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
*changed |= BSS_CHANGED_BEACON;
mcsa_dbg(sdata, "complete switching to center freq %d MHz",
- sdata->vif.bss_conf.chandef.chan->center_freq);
+ sdata->vif.bss_conf.chanreq.oper.chan->center_freq);
return 0;
}
@@ -1792,6 +1776,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
ifmsh->last_preq = jiffies;
ifmsh->next_perr = jiffies;
ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
+ ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
/* Allocate all mesh structures when creating the first mesh interface. */
if (!mesh_allocated)
ieee80211s_init();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 58c619874c..3f9664e4e0 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -94,6 +94,7 @@ enum mesh_deferred_task_flags {
* @is_root: the destination station of this path is a root node
* @is_gate: the destination station of this path is a mesh gate
* @path_change_count: the number of path changes to destination
+ * @fast_tx_check: timestamp of last fast-xmit enable attempt
*
*
* The dst address is unique in the mesh path table. Since the mesh_path is
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a6b62169f0..c0a5c75cdd 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
*/
void mesh_path_flush_pending(struct mesh_path *mpath)
{
+ struct ieee80211_sub_if_data *sdata = mpath->sdata;
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_preq_queue *preq, *tmp;
struct sk_buff *skb;
while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
mesh_path_discard_frame(mpath->sdata, skb);
+
+ spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
+ list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) {
+ if (ether_addr_equal(mpath->dst, preq->dst)) {
+ list_del(&preq->list);
+ kfree(preq);
+ --ifmsh->preq_queue_len;
+ }
+ }
+ spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
}
/**
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 28bf794f67..8f2b492a9f 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2019, 2021-2024 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
#include <linux/gfp.h>
@@ -163,7 +163,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
u16 ht_opmode;
bool non_ht_sta = false, ht20_sta = false;
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
@@ -196,7 +196,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
if (non_ht_sta)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED;
else if (ht20_sta &&
- sdata->vif.bss_conf.chandef.width > NL80211_CHAN_WIDTH_20)
+ sdata->vif.bss_conf.chanreq.oper.width > NL80211_CHAN_WIDTH_20)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ;
else
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE;
@@ -226,10 +226,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
- ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
- ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata,
- NL80211_IFTYPE_MESH_POINT);
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata);
+ ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
2 + /* capability info */
@@ -266,14 +264,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
struct ieee80211_supported_band *sband;
- enum nl80211_band band;
+ u32 rate_flags, basic_rates;
sband = ieee80211_get_sband(sdata);
if (!sband) {
err = -EINVAL;
goto free;
}
- band = sband->band;
/* capability info */
pos = skb_put_zero(skb, 2);
@@ -282,8 +279,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, 2);
put_unaligned_le16(sta->sta.aid, pos);
}
- if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
- ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
+
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
+ basic_rates = sdata->vif.bss_conf.basic_rates;
+
+ if (ieee80211_put_srates_elem(skb, sband, basic_rates,
+ rate_flags, 0,
+ WLAN_EID_SUPP_RATES) ||
+ ieee80211_put_srates_elem(skb, sband, basic_rates,
+ rate_flags, 0,
+ WLAN_EID_EXT_SUPP_RATES) ||
mesh_add_rsn_ie(sdata, skb) ||
mesh_add_meshid_ie(sdata, skb) ||
mesh_add_meshconf_ie(sdata, skb))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 94028b541b..ad2ce9c92b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -46,6 +46,8 @@
#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100)
#define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00
+#define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5)
+
static int max_nullfunc_tries = 2;
module_param(max_nullfunc_tries, int, 0644);
MODULE_PARM_DESC(max_nullfunc_tries,
@@ -92,84 +94,6 @@ MODULE_PARM_DESC(probe_wait_ms,
#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
/*
- * Extract from the given disabled subchannel bitmap (raw format
- * from the EHT Operation Element) the bits for the subchannel
- * we're using right now.
- */
-static u16
-ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper,
- struct cfg80211_chan_def *chandef, u16 bitmap)
-{
- struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional;
- struct cfg80211_chan_def ap_chandef = *chandef;
- u32 ap_center_freq, local_center_freq;
- u32 ap_bw, local_bw;
- int ap_start_freq, local_start_freq;
- u16 shift, mask;
-
- if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) ||
- !(eht_oper->params &
- IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT))
- return 0;
-
- /* set 160/320 supported to get the full AP definition */
- ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- true, true, &ap_chandef);
- ap_center_freq = ap_chandef.center_freq1;
- ap_bw = 20 * BIT(u8_get_bits(info->control,
- IEEE80211_EHT_OPER_CHAN_WIDTH));
- ap_start_freq = ap_center_freq - ap_bw / 2;
- local_center_freq = chandef->center_freq1;
- local_bw = 20 * BIT(ieee80211_chan_width_to_rx_bw(chandef->width));
- local_start_freq = local_center_freq - local_bw / 2;
- shift = (local_start_freq - ap_start_freq) / 20;
- mask = BIT(local_bw / 20) - 1;
-
- return (bitmap >> shift) & mask;
-}
-
-/*
- * Handle the puncturing bitmap, possibly downgrading bandwidth to get a
- * valid bitmap.
- */
-static void
-ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
- const struct ieee80211_eht_operation *eht_oper,
- u16 bitmap, u64 *changed)
-{
- struct cfg80211_chan_def *chandef = &link->conf->chandef;
- struct ieee80211_local *local = link->sdata->local;
- u16 extracted;
- u64 _changed = 0;
-
- if (!changed)
- changed = &_changed;
-
- while (chandef->width > NL80211_CHAN_WIDTH_40) {
- extracted =
- ieee80211_extract_dis_subch_bmap(eht_oper, chandef,
- bitmap);
-
- if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- chandef) &&
- !(bitmap && ieee80211_hw_check(&local->hw,
- DISALLOW_PUNCTURING)))
- break;
- link->u.mgd.conn_flags |=
- ieee80211_chandef_downgrade(chandef);
- *changed |= BSS_CHANGED_BANDWIDTH;
- }
-
- if (chandef->width <= NL80211_CHAN_WIDTH_40)
- extracted = 0;
-
- if (link->conf->eht_puncturing != extracted) {
- link->conf->eht_puncturing = extracted;
- *changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-}
-
-/*
* We can have multiple work items (and connection probing)
* scheduling this timer, but we need to take care to only
* reschedule it when it should fire _earlier_ than it was
@@ -223,77 +147,84 @@ static int ecw2cw(int ecw)
return (1 << ecw) - 1;
}
-static ieee80211_conn_flags_t
-ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_link_data *link,
- ieee80211_conn_flags_t conn_flags,
- struct ieee80211_supported_band *sband,
- struct ieee80211_channel *channel,
- u32 vht_cap_info,
- const struct ieee80211_ht_operation *ht_oper,
- const struct ieee80211_vht_operation *vht_oper,
- const struct ieee80211_he_operation *he_oper,
- const struct ieee80211_eht_operation *eht_oper,
- const struct ieee80211_s1g_oper_ie *s1g_oper,
- struct cfg80211_chan_def *chandef, bool tracking)
+static enum ieee80211_conn_mode
+ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ u32 vht_cap_info,
+ const struct ieee802_11_elems *elems,
+ bool ignore_ht_channel_mismatch,
+ const struct ieee80211_conn_settings *conn,
+ struct cfg80211_chan_def *chandef)
{
+ const struct ieee80211_ht_operation *ht_oper = elems->ht_operation;
+ const struct ieee80211_vht_operation *vht_oper = elems->vht_operation;
+ const struct ieee80211_he_operation *he_oper = elems->he_operation;
+ const struct ieee80211_eht_operation *eht_oper = elems->eht_operation;
+ struct ieee80211_supported_band *sband =
+ sdata->local->hw.wiphy->bands[channel->band];
struct cfg80211_chan_def vht_chandef;
- struct ieee80211_sta_ht_cap sta_ht_cap;
- ieee80211_conn_flags_t ret;
+ bool no_vht = false;
u32 ht_cfreq;
- memset(chandef, 0, sizeof(struct cfg80211_chan_def));
- chandef->chan = channel;
- chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef->center_freq1 = channel->center_freq;
- chandef->freq1_offset = channel->freq_offset;
+ *chandef = (struct cfg80211_chan_def) {
+ .chan = channel,
+ .width = NL80211_CHAN_WIDTH_20_NOHT,
+ .center_freq1 = channel->center_freq,
+ .freq1_offset = channel->freq_offset,
+ };
- if (channel->band == NL80211_BAND_6GHZ) {
- if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, eht_oper,
- chandef)) {
- mlme_dbg(sdata,
- "bad 6 GHz operation, disabling HT/VHT/HE/EHT\n");
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- } else {
- ret = 0;
- }
- vht_chandef = *chandef;
- goto out;
- } else if (sband->band == NL80211_BAND_S1GHZ) {
- if (!ieee80211_chandef_s1g_oper(s1g_oper, chandef)) {
+ /* get special S1G case out of the way */
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ if (!ieee80211_chandef_s1g_oper(elems->s1g_oper, chandef)) {
sdata_info(sdata,
"Missing S1G Operation Element? Trying operating == primary\n");
chandef->width = ieee80211_s1g_channel_width(channel);
}
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
- goto out;
+ return IEEE80211_CONN_MODE_S1G;
}
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+ /* get special 6 GHz case out of the way */
+ if (sband->band == NL80211_BAND_6GHZ) {
+ enum ieee80211_conn_mode mode = IEEE80211_CONN_MODE_EHT;
- if (!ht_oper || !sta_ht_cap.ht_supported) {
- mlme_dbg(sdata, "HT operation missing / HT not supported\n");
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ /* this is an error */
+ if (conn->mode < IEEE80211_CONN_MODE_HE)
+ return IEEE80211_CONN_MODE_LEGACY;
+
+ if (!elems->he_6ghz_capa || !elems->he_cap) {
+ sdata_info(sdata,
+ "HE 6 GHz AP is missing HE/HE 6 GHz band capability\n");
+ return IEEE80211_CONN_MODE_LEGACY;
+ }
+
+ if (!eht_oper || !elems->eht_cap) {
+ eht_oper = NULL;
+ mode = IEEE80211_CONN_MODE_HE;
+ }
+
+ if (!ieee80211_chandef_he_6ghz_oper(sdata->local, he_oper,
+ eht_oper, chandef)) {
+ sdata_info(sdata, "bad HE/EHT 6 GHz operation\n");
+ return IEEE80211_CONN_MODE_LEGACY;
+ }
+
+ return mode;
}
+ /* now we have the progression HT, VHT, ... */
+ if (conn->mode < IEEE80211_CONN_MODE_HT)
+ return IEEE80211_CONN_MODE_LEGACY;
+
+ if (!ht_oper || !elems->ht_cap_elem)
+ return IEEE80211_CONN_MODE_LEGACY;
+
chandef->width = NL80211_CHAN_WIDTH_20;
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
- if (!tracking && channel->center_freq != ht_cfreq) {
+ if (!ignore_ht_channel_mismatch && channel->center_freq != ht_cfreq) {
/*
* It's possible that some APs are confused here;
* Netgear WNDR3700 sometimes reports 4 higher than
@@ -305,36 +236,22 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
"Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
channel->center_freq, ht_cfreq,
ht_oper->primary_chan, channel->band);
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ return IEEE80211_CONN_MODE_LEGACY;
}
- /* check 40 MHz support, if we have it */
- if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
- ieee80211_chandef_ht_oper(ht_oper, chandef);
- } else {
- mlme_dbg(sdata, "40 MHz not supported\n");
- /* 40 MHz (and 80 MHz) must be supported for VHT */
- ret = IEEE80211_CONN_DISABLE_VHT;
- /* also mark 40 MHz disabled */
- ret |= IEEE80211_CONN_DISABLE_40MHZ;
- goto out;
- }
+ ieee80211_chandef_ht_oper(ht_oper, chandef);
- if (!vht_oper || !sband->vht_cap.vht_supported) {
- mlme_dbg(sdata, "VHT operation missing / VHT not supported\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
- }
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
+ return IEEE80211_CONN_MODE_HT;
vht_chandef = *chandef;
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- he_oper &&
- (le32_to_cpu(he_oper->he_oper_params) &
- IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
+
+ /*
+ * having he_cap/he_oper parsed out implies we're at
+ * least operating as HE STA
+ */
+ if (elems->he_cap && he_oper &&
+ he_oper->he_oper_params & cpu_to_le32(IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
struct ieee80211_vht_operation he_oper_vht_cap;
/*
@@ -347,253 +264,625 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
&he_oper_vht_cap, ht_oper,
&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HE))
- sdata_info(sdata,
- "HE AP VHT information is invalid, disabling HE\n");
- ret = IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "HE AP VHT information is invalid, disabling HE\n");
+ /* this will cause us to re-parse as VHT STA */
+ return IEEE80211_CONN_MODE_VHT;
}
+ } else if (!vht_oper || !elems->vht_cap_elem) {
+ if (sband->band == NL80211_BAND_5GHZ) {
+ sdata_info(sdata,
+ "VHT information is missing, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
+ }
+ no_vht = true;
+ } else if (sband->band == NL80211_BAND_2GHZ) {
+ no_vht = true;
} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
vht_cap_info,
vht_oper, ht_oper,
&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ sdata_info(sdata,
+ "AP VHT information is invalid, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
}
- if (!cfg80211_chandef_valid(&vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
+ sdata_info(sdata,
+ "AP VHT information doesn't match HT, disabling VHT\n");
+ return IEEE80211_CONN_MODE_HT;
}
- if (cfg80211_chandef_identical(chandef, &vht_chandef)) {
- ret = 0;
- goto out;
- }
+ *chandef = vht_chandef;
- if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
- sdata_info(sdata,
- "AP VHT information doesn't match HT, disabling VHT\n");
- ret = IEEE80211_CONN_DISABLE_VHT;
- goto out;
+ /* stick to current max mode if we or the AP don't have HE */
+ if (conn->mode < IEEE80211_CONN_MODE_HE ||
+ !elems->he_operation || !elems->he_cap) {
+ if (no_vht)
+ return IEEE80211_CONN_MODE_HT;
+ return IEEE80211_CONN_MODE_VHT;
}
- *chandef = vht_chandef;
+ /* stick to HE if we or the AP don't have EHT */
+ if (conn->mode < IEEE80211_CONN_MODE_EHT ||
+ !eht_oper || !elems->eht_cap)
+ return IEEE80211_CONN_MODE_HE;
/*
* handle the case that the EHT operation indicates that it holds EHT
* operation information (in case that the channel width differs from
* the channel width reported in HT/VHT/HE).
*/
- if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
+ if (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) {
struct cfg80211_chan_def eht_chandef = *chandef;
ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- eht_chandef.width ==
- NL80211_CHAN_WIDTH_160,
- false, &eht_chandef);
+ &eht_chandef);
+
+ eht_chandef.punctured =
+ ieee80211_eht_oper_dis_subchan_bitmap(eht_oper);
if (!cfg80211_chandef_valid(&eht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
- sdata_info(sdata,
- "AP EHT information is invalid, disabling EHT\n");
- ret = IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "AP EHT information is invalid, disabling EHT\n");
+ return IEEE80211_CONN_MODE_HE;
}
if (!cfg80211_chandef_compatible(chandef, &eht_chandef)) {
- if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
- sdata_info(sdata,
- "AP EHT information is incompatible, disabling EHT\n");
- ret = IEEE80211_CONN_DISABLE_EHT;
- goto out;
+ sdata_info(sdata,
+ "AP EHT information doesn't match HT/VHT/HE, disabling EHT\n");
+ return IEEE80211_CONN_MODE_HE;
}
*chandef = eht_chandef;
}
- ret = 0;
+ return IEEE80211_CONN_MODE_EHT;
+}
+
+static bool
+ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_he_cap_elem *he_cap,
+ const struct ieee80211_he_operation *he_op)
+{
+ struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
+ u16 mcs_80_map_tx, mcs_80_map_rx;
+ u16 ap_min_req_set;
+ int nss;
+
+ if (!he_cap)
+ return false;
+
+ /* mcs_nss is right after he_cap info */
+ he_mcs_nss_supp = (void *)(he_cap + 1);
+
+ mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
+ mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80);
+
+ /* P802.11-REVme/D0.3
+ * 27.1.1 Introduction to the HE PHY
+ * ...
+ * An HE STA shall support the following features:
+ * ...
+ * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all
+ * supported channel widths for HE SU PPDUs
+ */
+ if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) {
+ sdata_info(sdata,
+ "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n",
+ mcs_80_map_tx, mcs_80_map_rx);
+ return false;
+ }
+
+ if (!he_op)
+ return true;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
-out:
/*
- * When tracking the current AP, don't do any further checks if the
- * new chandef is identical to the one we're currently using for the
- * connection. This keeps us from playing ping-pong with regulatory,
- * without it the following can happen (for example):
- * - connect to an AP with 80 MHz, world regdom allows 80 MHz
- * - AP advertises regdom US
- * - CRDA loads regdom US with 80 MHz prohibited (old database)
- * - the code below detects an unsupported channel, downgrades, and
- * we disconnect from the AP in the caller
- * - disconnect causes CRDA to reload world regdomain and the game
- * starts anew.
- * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
+ */
+ if (!ap_min_req_set)
+ return true;
+
+ /* make sure the AP is consistent with itself
*
- * It seems possible that there are still scenarios with CSA or real
- * bandwidth changes where a this could happen, but those cases are
- * less common and wouldn't completely prevent using the AP.
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * A STA that is operating in an HE BSS shall be able to receive and
+ * transmit at each of the <HE-MCS, NSS> tuple values indicated by the
+ * Basic HE-MCS And NSS Set field of the HE Operation parameter of the
+ * MLME-START.request primitive and shall be able to receive at each of
+ * the <HE-MCS, NSS> tuple values indicated by the Supported HE-MCS and
+ * NSS Set field in the HE Capabilities parameter of the MLMESTART.request
+ * primitive
*/
- if (tracking &&
- cfg80211_chandef_identical(chandef, &link->conf->chandef))
- return ret;
+ for (nss = 8; nss > 0; nss--) {
+ u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ u8 ap_rx_val;
+ u8 ap_tx_val;
+
+ if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3;
+ ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3;
+
+ if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) {
+ sdata_info(sdata,
+ "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n",
+ nss, ap_rx_val, ap_rx_val, ap_op_val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_he_operation *he_op)
+{
+ const struct ieee80211_sta_he_cap *sta_he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ u16 ap_min_req_set;
+ int i;
- /* don't print the message below for VHT mismatch if VHT is disabled */
- if (ret & IEEE80211_CONN_DISABLE_VHT)
- vht_chandef = *chandef;
+ if (!sta_he_cap || !he_op)
+ return false;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
/*
- * Ignore the DISABLED flag when we're already connected and only
- * tracking the APs beacon for bandwidth changes - otherwise we
- * might get disconnected here if we connect to an AP, update our
- * regulatory information based on the AP's country IE and the
- * information we have is wrong/outdated and disables the channel
- * that we're actually using for the connection to the AP.
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
*/
- while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
- tracking ? 0 :
- IEEE80211_CHAN_DISABLED)) {
- if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
- ret = IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- break;
+ if (!ap_min_req_set)
+ return true;
+
+ /* Need to go over for 80MHz, 160MHz and for 80+80 */
+ for (i = 0; i < 3; i++) {
+ const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+ &sta_he_cap->he_mcs_nss_supp;
+ u16 sta_mcs_map_rx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+ u16 sta_mcs_map_tx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+ u8 nss;
+ bool verified = true;
+
+ /*
+ * For each band there is a maximum of 8 spatial streams
+ * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+ * of 2 bits per NSS (1-8), with the values defined in enum
+ * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+ * capabilities aren't less than the AP's minimum requirements
+ * for this HE BSS per SS.
+ * It is enough to find one such band that meets the reqs.
+ */
+ for (nss = 8; nss > 0; nss--) {
+ u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+ u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+ u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+ if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ /*
+ * Make sure the HE AP doesn't require MCSs that aren't
+ * supported by the client as required by spec
+ *
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * An HE STA shall not attempt to join * (MLME-JOIN.request primitive)
+ * a BSS, unless it supports (i.e., is able to both transmit and
+ * receive using) all of the <HE-MCS, NSS> tuples in the basic
+ * HE-MCS and NSS set.
+ */
+ if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+ verified = false;
+ break;
+ }
+ }
+
+ if (verified)
+ return true;
+ }
+
+ /* If here, STA doesn't meet AP's HE min requirements */
+ return false;
+}
+
+static u8
+ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap,
+ const struct ieee80211_sta_eht_cap *sta_eht_cap,
+ unsigned int idx, int bw)
+{
+ u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0];
+ u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0];
+
+ /* handle us being a 20 MHz-only EHT STA - with four values
+ * for MCS 0-7, 8-9, 10-11, 12-13.
+ */
+ if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL))
+ return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx];
+
+ /* the others have MCS 0-9 together, rather than separately from 0-7 */
+ if (idx > 0)
+ idx--;
+
+ switch (bw) {
+ case 0:
+ return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx];
+ case 1:
+ if (!(he_phy_cap0 &
+ (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)))
+ return 0xff; /* pass check */
+ return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx];
+ case 2:
+ if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ))
+ return 0xff; /* pass check */
+ return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx];
+ }
+
+ WARN_ON(1);
+ return 0;
+}
+
+static bool
+ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_eht_operation *eht_op)
+{
+ const struct ieee80211_sta_he_cap *sta_he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_sta_eht_cap *sta_eht_cap =
+ ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req;
+ unsigned int i;
+
+ if (!sta_he_cap || !sta_eht_cap || !eht_op)
+ return false;
+
+ req = &eht_op->basic_mcs_nss;
+
+ for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) {
+ u8 req_rx_nss, req_tx_nss;
+ unsigned int bw;
+
+ req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+ IEEE80211_EHT_MCS_NSS_RX);
+ req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+ IEEE80211_EHT_MCS_NSS_TX);
+
+ for (bw = 0; bw < 3; bw++) {
+ u8 have, have_rx_nss, have_tx_nss;
+
+ have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap,
+ sta_eht_cap,
+ i, bw);
+ have_rx_nss = u8_get_bits(have,
+ IEEE80211_EHT_MCS_NSS_RX);
+ have_tx_nss = u8_get_bits(have,
+ IEEE80211_EHT_MCS_NSS_TX);
+
+ if (req_rx_nss > have_rx_nss ||
+ req_tx_nss > have_tx_nss)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags)
+{
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy,
+ chandef, prohibited_flags))
+ return false;
+
+ if (chandef->punctured &&
+ ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING))
+ return false;
+
+ if (chandef->punctured && chandef->chan->band == NL80211_BAND_5GHZ &&
+ ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING_5GHZ))
+ return false;
+
+ return true;
+}
+
+static struct ieee802_11_elems *
+ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ struct cfg80211_bss *cbss, int link_id,
+ struct ieee80211_chan_req *chanreq)
+{
+ const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies);
+ struct ieee80211_bss *bss = (void *)cbss->priv;
+ struct ieee80211_channel *channel = cbss->channel;
+ struct ieee80211_elems_parse_params parse_params = {
+ .link_id = -1,
+ .from_ap = true,
+ .start = ies->data,
+ .len = ies->len,
+ };
+ struct ieee802_11_elems *elems;
+ struct ieee80211_supported_band *sband;
+ struct cfg80211_chan_def ap_chandef;
+ enum ieee80211_conn_mode ap_mode;
+ int ret;
+
+again:
+ parse_params.mode = conn->mode;
+ elems = ieee802_11_parse_elems_full(&parse_params);
+ if (!elems)
+ return ERR_PTR(-ENOMEM);
+
+ ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info,
+ elems, false, conn, &ap_chandef);
+
+ /* this should be impossible since parsing depends on our mode */
+ if (WARN_ON(ap_mode > conn->mode)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ if (conn->mode != ap_mode) {
+ conn->mode = ap_mode;
+ kfree(elems);
+ goto again;
+ }
+
+ mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
+ cbss->bssid, ieee80211_conn_mode_str(ap_mode));
+
+ sband = sdata->local->hw.wiphy->bands[channel->band];
+
+ switch (channel->band) {
+ case NL80211_BAND_S1GHZ:
+ if (WARN_ON(ap_mode != IEEE80211_CONN_MODE_S1G)) {
+ ret = -EINVAL;
+ goto free;
}
+ return elems;
+ case NL80211_BAND_6GHZ:
+ if (ap_mode < IEEE80211_CONN_MODE_HE) {
+ sdata_info(sdata,
+ "Rejecting non-HE 6/7 GHz connection");
+ ret = -EINVAL;
+ goto free;
+ }
+ break;
+ default:
+ if (WARN_ON(ap_mode == IEEE80211_CONN_MODE_S1G)) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
- ret |= ieee80211_chandef_downgrade(chandef);
+ switch (ap_mode) {
+ case IEEE80211_CONN_MODE_S1G:
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto free;
+ case IEEE80211_CONN_MODE_LEGACY:
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
+ case IEEE80211_CONN_MODE_HT:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_40);
+ break;
+ case IEEE80211_CONN_MODE_VHT:
+ case IEEE80211_CONN_MODE_HE:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ break;
+ case IEEE80211_CONN_MODE_EHT:
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_320);
+ break;
}
- if (!he_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
- IEEE80211_CHAN_NO_HE))
- ret |= IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
+ chanreq->oper = ap_chandef;
- if (!eht_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
- IEEE80211_CHAN_NO_EHT))
- ret |= IEEE80211_CONN_DISABLE_EHT;
+ /* wider-bandwidth OFDMA is only done in EHT */
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW))
+ chanreq->ap = ap_chandef;
+ else
+ chanreq->ap.chan = NULL;
- if (chandef->width != vht_chandef.width && !tracking)
+ while (!ieee80211_chandef_usable(sdata, &chanreq->oper,
+ IEEE80211_CHAN_DISABLED)) {
+ if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ieee80211_chanreq_downgrade(chanreq, conn);
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_HE &&
+ !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper,
+ IEEE80211_CHAN_NO_HE)) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper,
+ IEEE80211_CHAN_NO_EHT)) {
+ conn->mode = IEEE80211_CONN_MODE_HE;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ }
+
+ if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode)
sdata_info(sdata,
- "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
+ "regulatory prevented using AP config, downgraded\n");
- WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
- return ret;
+ if (conn->mode >= IEEE80211_CONN_MODE_HE &&
+ (!ieee80211_verify_peer_he_mcs_support(sdata, (void *)elems->he_cap,
+ elems->he_operation) ||
+ !ieee80211_verify_sta_he_mcs_support(sdata, sband,
+ elems->he_operation))) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ sdata_info(sdata, "required MCSes not supported, disabling HE\n");
+ }
+
+ if (conn->mode >= IEEE80211_CONN_MODE_EHT &&
+ !ieee80211_verify_sta_eht_mcs_support(sdata, sband,
+ elems->eht_operation)) {
+ conn->mode = IEEE80211_CONN_MODE_HE;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_160);
+ sdata_info(sdata, "required MCSes not supported, disabling EHT\n");
+ }
+
+ /* the mode can only decrease, so this must terminate */
+ if (ap_mode != conn->mode) {
+ kfree(elems);
+ goto again;
+ }
+
+ mlme_link_id_dbg(sdata, link_id,
+ "connecting with %s mode, max bandwidth %d MHz\n",
+ ieee80211_conn_mode_str(conn->mode),
+ 20 * (1 << conn->bw_limit));
+
+ if (WARN_ON_ONCE(!cfg80211_chandef_valid(&chanreq->oper))) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ return elems;
+free:
+ kfree(elems);
+ return ERR_PTR(ret);
}
static int ieee80211_config_bw(struct ieee80211_link_data *link,
- const struct ieee80211_ht_cap *ht_cap,
- const struct ieee80211_vht_cap *vht_cap,
- const struct ieee80211_ht_operation *ht_oper,
- const struct ieee80211_vht_operation *vht_oper,
- const struct ieee80211_he_operation *he_oper,
- const struct ieee80211_eht_operation *eht_oper,
- const struct ieee80211_s1g_oper_ie *s1g_oper,
- const u8 *bssid, u64 *changed)
+ struct ieee802_11_elems *elems,
+ bool update, u64 *changed)
{
+ struct ieee80211_channel *channel = link->conf->chanreq.oper.chan;
struct ieee80211_sub_if_data *sdata = link->sdata;
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_channel *chan = link->conf->chandef.chan;
- struct ieee80211_supported_band *sband =
- local->hw.wiphy->bands[chan->band];
- struct cfg80211_chan_def chandef;
- u16 ht_opmode;
- ieee80211_conn_flags_t flags;
+ struct ieee80211_chan_req chanreq = {};
+ enum ieee80211_conn_mode ap_mode;
u32 vht_cap_info = 0;
+ u16 ht_opmode;
int ret;
- /* if HT was/is disabled, don't track any bandwidth changes */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT || !ht_oper)
+ /* don't track any bandwidth changes in legacy/S1G modes */
+ if (link->u.mgd.conn.mode == IEEE80211_CONN_MODE_LEGACY ||
+ link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G)
return 0;
- /* don't check VHT if we associated as non-VHT station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
- vht_oper = NULL;
+ if (elems->vht_cap_elem)
+ vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+
+ ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info,
+ elems, true, &link->u.mgd.conn,
+ &chanreq.ap);
- /* don't check HE if we associated as non-HE station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE ||
- !ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) {
- he_oper = NULL;
- eht_oper = NULL;
+ if (ap_mode != link->u.mgd.conn.mode) {
+ link_info(link,
+ "AP appears to change mode (expected %s, found %s), disconnect\n",
+ ieee80211_conn_mode_str(link->u.mgd.conn.mode),
+ ieee80211_conn_mode_str(ap_mode));
+ return -EINVAL;
}
- /* don't check EHT if we associated as non-EHT station */
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT ||
- !ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif))
- eht_oper = NULL;
+ chanreq.oper = chanreq.ap;
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT ||
+ sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)
+ chanreq.ap.chan = NULL;
/*
- * if bss configuration changed store the new one -
+ * if HT operation mode changed store the new one -
* this may be applicable even if channel is identical
*/
- ht_opmode = le16_to_cpu(ht_oper->operation_mode);
- if (link->conf->ht_operation_mode != ht_opmode) {
- *changed |= BSS_CHANGED_HT;
- link->conf->ht_operation_mode = ht_opmode;
+ if (elems->ht_operation) {
+ ht_opmode = le16_to_cpu(elems->ht_operation->operation_mode);
+ if (link->conf->ht_operation_mode != ht_opmode) {
+ *changed |= BSS_CHANGED_HT;
+ link->conf->ht_operation_mode = ht_opmode;
+ }
}
- if (vht_cap)
- vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info);
-
- /* calculate new channel (type) based on HT/VHT/HE operation IEs */
- flags = ieee80211_determine_chantype(sdata, link,
- link->u.mgd.conn_flags,
- sband, chan, vht_cap_info,
- ht_oper, vht_oper,
- he_oper, eht_oper,
- s1g_oper, &chandef, true);
-
/*
* Downgrade the new channel if we associated with restricted
- * capabilities. For example, if we associated as a 20 MHz STA
- * to a 40 MHz AP (due to regulatory, capabilities or config
- * reasons) then switching to a 40 MHz channel now won't do us
- * any good -- we couldn't use it with the AP.
+ * bandwidth capabilities. For example, if we associated as a
+ * 20 MHz STA to a 40 MHz AP (due to regulatory, capabilities
+ * or config reasons) then switching to a 40 MHz channel now
+ * won't do us any good -- we couldn't use it with the AP.
*/
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
- chandef.width == NL80211_CHAN_WIDTH_80P80)
- flags |= ieee80211_chandef_downgrade(&chandef);
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
- chandef.width == NL80211_CHAN_WIDTH_160)
- flags |= ieee80211_chandef_downgrade(&chandef);
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_40MHZ &&
- chandef.width > NL80211_CHAN_WIDTH_20)
- flags |= ieee80211_chandef_downgrade(&chandef);
-
- if (cfg80211_chandef_identical(&chandef, &link->conf->chandef))
+ while (link->u.mgd.conn.bw_limit <
+ ieee80211_min_bw_limit_from_chandef(&chanreq.oper))
+ ieee80211_chandef_downgrade(&chanreq.oper, NULL);
+
+ if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq))
return 0;
link_info(link,
- "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n",
- link->u.mgd.bssid, chandef.chan->center_freq,
- chandef.chan->freq_offset, chandef.width,
- chandef.center_freq1, chandef.freq1_offset,
- chandef.center_freq2);
-
- if (flags != (link->u.mgd.conn_flags &
- (IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT |
- IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ |
- IEEE80211_CONN_DISABLE_320MHZ)) ||
- !cfg80211_chandef_valid(&chandef)) {
+ "AP %pM changed bandwidth, new used config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n",
+ link->u.mgd.bssid, chanreq.oper.chan->center_freq,
+ chanreq.oper.chan->freq_offset, chanreq.oper.width,
+ chanreq.oper.center_freq1, chanreq.oper.freq1_offset,
+ chanreq.oper.center_freq2);
+
+ if (!cfg80211_chandef_valid(&chanreq.oper)) {
sdata_info(sdata,
- "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
- link->u.mgd.bssid, flags, ifmgd->flags);
+ "AP %pM changed caps/bw in a way we can't support - disconnect\n",
+ link->u.mgd.bssid);
return -EINVAL;
}
- ret = ieee80211_link_change_bandwidth(link, &chandef, changed);
+ if (!update) {
+ link->conf->chanreq = chanreq;
+ return 0;
+ }
+ /*
+ * We're tracking the current AP here, so don't do any further checks
+ * here. This keeps us from playing ping-pong with regulatory, without
+ * it the following can happen (for example):
+ * - connect to an AP with 80 MHz, world regdom allows 80 MHz
+ * - AP advertises regdom US
+ * - CRDA loads regdom US with 80 MHz prohibited (old database)
+ * - we detect an unsupported channel and disconnect
+ * - disconnect causes CRDA to reload world regdomain and the game
+ * starts anew.
+ * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+ *
+ * It seems possible that there are still scenarios with CSA or real
+ * bandwidth changes where a this could happen, but those cases are
+ * less common and wouldn't completely prevent using the AP.
+ */
+
+ ret = ieee80211_link_change_chanreq(link, &chanreq, changed);
if (ret) {
sdata_info(sdata,
"AP %pM changed bandwidth to incompatible one - disconnect\n",
@@ -612,7 +901,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
enum ieee80211_smps_mode smps,
- ieee80211_conn_flags_t conn_flags)
+ const struct ieee80211_conn_settings *conn)
{
u8 *pos;
u32 flags = channel->flags;
@@ -647,7 +936,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
* capable of 40 MHz -- some broken APs will never fall
* back to trying to transmit in 20 MHz.
*/
- if (conn_flags & IEEE80211_CONN_DISABLE_40MHZ) {
+ if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_20) {
cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
cap &= ~IEEE80211_HT_CAP_SGI_40;
}
@@ -686,7 +975,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
struct ieee80211_supported_band *sband,
struct ieee80211_vht_cap *ap_vht_cap,
- ieee80211_conn_flags_t conn_flags)
+ const struct ieee80211_conn_settings *conn)
{
struct ieee80211_local *local = sdata->local;
u8 *pos;
@@ -703,16 +992,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
/* determine capability flags */
cap = vht_cap.cap;
- if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ) {
- u32 bw = cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
-
- cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
- if (bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ ||
- bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
- cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
- }
-
- if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ) {
+ if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_80) {
cap &= ~IEEE80211_VHT_CAP_SHORT_GI_160;
cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
}
@@ -769,79 +1049,12 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
return mu_mimo_owner;
}
-/* This function determines HE capability flags for the association
- * and builds the IE.
- */
-static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- struct ieee80211_supported_band *sband,
- enum ieee80211_smps_mode smps_mode,
- ieee80211_conn_flags_t conn_flags)
-{
- u8 *pos, *pre_he_pos;
- const struct ieee80211_sta_he_cap *he_cap;
- u8 he_cap_size;
-
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- if (WARN_ON(!he_cap))
- return;
-
- /* get a max size estimate */
- he_cap_size =
- 2 + 1 + sizeof(he_cap->he_cap_elem) +
- ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
- ieee80211_he_ppe_size(he_cap->ppe_thres[0],
- he_cap->he_cap_elem.phy_cap_info);
- pos = skb_put(skb, he_cap_size);
- pre_he_pos = pos;
- pos = ieee80211_ie_build_he_cap(conn_flags,
- pos, he_cap, pos + he_cap_size);
- /* trim excess if any */
- skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos));
-
- ieee80211_ie_build_he_6ghz_cap(sdata, smps_mode, skb);
-}
-
-static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- struct ieee80211_supported_band *sband)
-{
- u8 *pos;
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
- u8 eht_cap_size;
-
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
-
- /*
- * EHT capabilities element is only added if the HE capabilities element
- * was added so assume that 'he_cap' is valid and don't check it.
- */
- if (WARN_ON(!he_cap || !eht_cap))
- return;
-
- eht_cap_size =
- 2 + 1 + sizeof(eht_cap->eht_cap_elem) +
- ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem,
- false) +
- ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
- pos = skb_put(skb, eht_cap_size);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + eht_cap_size,
- false);
-}
-
static void ieee80211_assoc_add_rates(struct sk_buff *skb,
enum nl80211_chan_width width,
struct ieee80211_supported_band *sband,
struct ieee80211_mgd_assoc_data *assoc_data)
{
- unsigned int rates_len, supp_rates_len;
- u32 rates = 0;
- int i, count;
- u8 *pos;
+ u32 rates;
if (assoc_data->supp_rates_len) {
/*
@@ -850,53 +1063,23 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb,
* in the association request (e.g. D-Link DAP 1353 in
* b-only mode)...
*/
- rates_len = ieee80211_parse_bitrates(width, sband,
- assoc_data->supp_rates,
- assoc_data->supp_rates_len,
- &rates);
+ ieee80211_parse_bitrates(width, sband,
+ assoc_data->supp_rates,
+ assoc_data->supp_rates_len,
+ &rates);
} else {
/*
* In case AP not provide any supported rates information
* before association, we send information element(s) with
* all rates that we support.
*/
- rates_len = sband->n_bitrates;
- for (i = 0; i < sband->n_bitrates; i++)
- rates |= BIT(i);
- }
-
- supp_rates_len = rates_len;
- if (supp_rates_len > 8)
- supp_rates_len = 8;
-
- pos = skb_put(skb, supp_rates_len + 2);
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = supp_rates_len;
-
- count = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
- if (BIT(i) & rates) {
- int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = (u8)rate;
- if (++count == 8)
- break;
- }
+ rates = ~0;
}
- if (rates_len > count) {
- pos = skb_put(skb, rates_len - count + 2);
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = rates_len - count;
-
- for (i++; i < sband->n_bitrates; i++) {
- if (BIT(i) & rates) {
- int rate;
-
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = (u8)rate;
- }
- }
- }
+ ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates,
+ WLAN_EID_SUPP_RATES);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates,
+ WLAN_EID_EXT_SUPP_RATES);
}
static size_t ieee80211_add_before_ht_elems(struct sk_buff *skb,
@@ -1133,11 +1316,11 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
offset);
if (sband->band != NL80211_BAND_6GHZ &&
- !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HT) {
ieee80211_add_ht_ie(sdata, skb,
assoc_data->link[link_id].ap_ht_param,
sband, chan, smps_mode,
- assoc_data->link[link_id].conn_flags);
+ &assoc_data->link[link_id].conn);
ADD_PRESENT_ELEM(WLAN_EID_HT_CAPABILITY);
}
@@ -1147,37 +1330,28 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
offset);
if (sband->band != NL80211_BAND_6GHZ &&
- !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_VHT &&
+ sband->vht_cap.vht_supported) {
bool mu_mimo_owner =
ieee80211_add_vht_ie(sdata, skb, sband,
&assoc_data->link[link_id].ap_vht_cap,
- assoc_data->link[link_id].conn_flags);
+ &assoc_data->link[link_id].conn);
if (link)
link->conf->mu_mimo_owner = mu_mimo_owner;
ADD_PRESENT_ELEM(WLAN_EID_VHT_CAPABILITY);
}
- /*
- * If AP doesn't support HT, mark HE and EHT as disabled.
- * If on the 5GHz band, make sure it supports VHT.
- */
- if (assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT ||
- (sband->band == NL80211_BAND_5GHZ &&
- assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT))
- assoc_data->link[link_id].conn_flags |=
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
-
/* if present, add any custom IEs that go before HE */
offset = ieee80211_add_before_he_elems(skb, extra_elems,
extra_elems_len,
offset);
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HE)) {
- ieee80211_add_he_ie(sdata, skb, sband, smps_mode,
- assoc_data->link[link_id].conn_flags);
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HE) {
+ ieee80211_put_he_cap(skb, sdata, sband,
+ &assoc_data->link[link_id].conn);
ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_HE_CAPABILITY);
+ ieee80211_put_he_6ghz_cap(skb, sdata, smps_mode);
}
/*
@@ -1185,7 +1359,7 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
* calling ieee80211_assoc_add_ml_elem(), so add this one if
* we're going to put it after the ML element
*/
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT)
ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_EHT_CAPABILITY);
if (link_id == assoc_data->assoc_link_id)
@@ -1195,8 +1369,9 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
/* crash if somebody gets it wrong */
present_elems = NULL;
- if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
- ieee80211_add_eht_ie(sdata, skb, sband);
+ if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT)
+ ieee80211_put_eht_cap(skb, sdata, sband,
+ &assoc_data->link[link_id].conn);
if (sband->band == NL80211_BAND_S1GHZ) {
ieee80211_add_aid_request_ie(sdata, skb);
@@ -1206,9 +1381,6 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
- if (link)
- link->u.mgd.conn_flags = assoc_data->link[link_id].conn_flags;
-
return offset;
}
@@ -1318,8 +1490,6 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EML_CAPA);
skb_put_data(skb, &eml_capa, sizeof(eml_capa));
}
- /* need indication from userspace to support this */
- mld_capa_ops &= ~cpu_to_le16(IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP);
skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops));
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
@@ -1499,7 +1669,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
/* Set MBSSID support for HE AP if needed */
if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE &&
ext_capa && ext_capa->datalen >= 3)
ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
@@ -1544,7 +1714,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* for some reason check it and want it to be set, set the bit for all
* pre-EHT connections as we used to do.
*/
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT)
capab |= WLAN_CAPABILITY_ESS;
/* add the elements for the assoc (main) link */
@@ -1741,8 +1911,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
return;
}
- if (!cfg80211_chandef_identical(&link->conf->chandef,
- &link->csa_chandef)) {
+ if (!ieee80211_chanreq_identical(&link->conf->chanreq,
+ &link->csa_chanreq)) {
sdata_info(sdata,
"failed to finalize channel switch, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
@@ -1767,19 +1937,15 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
WARN_ON(!link->conf->csa_active);
- if (link->csa_block_tx) {
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- link->csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
link->conf->csa_active = false;
+ link->u.mgd.csa_blocked_tx = false;
link->u.mgd.csa_waiting_bcn = false;
- /*
- * If the CSA IE is still present on the beacon after the switch,
- * we need to consider it as a new CSA (possibly to self).
- */
- link->u.mgd.beacon_crc_valid = false;
ret = drv_post_channel_switch(link);
if (ret) {
@@ -1790,8 +1956,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef,
- link->link_id, 0);
+ cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper,
+ link->link_id);
}
void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
@@ -1838,14 +2004,16 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
ieee80211_link_unreserve_chanctx(link);
- if (link->csa_block_tx)
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_queues = false;
+ }
- link->csa_block_tx = false;
link->conf->csa_active = false;
+ link->u.mgd.csa_blocked_tx = false;
- drv_abort_channel_switch(sdata);
+ drv_abort_channel_switch(link);
}
static void
@@ -1857,12 +2025,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct cfg80211_bss *cbss = link->u.mgd.bss;
+ struct cfg80211_bss *cbss = link->conf->bss;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
enum nl80211_band current_band;
struct ieee80211_csa_ie csa_ie;
- struct ieee80211_channel_switch ch_switch;
+ struct ieee80211_channel_switch ch_switch = {
+ .link_id = link->link_id,
+ };
struct ieee80211_bss *bss;
unsigned long timeout;
int res;
@@ -1876,14 +2046,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
bss = (void *)cbss->priv;
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
bss->vht_cap_info,
- link->u.mgd.conn_flags,
+ &link->u.mgd.conn,
link->u.mgd.bssid, &csa_ie);
if (!res) {
ch_switch.timestamp = timestamp;
ch_switch.device_timestamp = device_timestamp;
ch_switch.block_tx = csa_ie.mode;
- ch_switch.chandef = csa_ie.chandef;
+ ch_switch.chandef = csa_ie.chanreq.oper;
ch_switch.count = csa_ie.count;
ch_switch.delay = csa_ie.max_switch_time;
}
@@ -1891,46 +2061,62 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
if (res < 0)
goto drop_connection;
- if (beacon && link->conf->csa_active &&
- !link->u.mgd.csa_waiting_bcn) {
- if (res)
+ if (link->conf->csa_active) {
+ /* already processing - disregard action frames */
+ if (!beacon)
+ return;
+
+ if (link->u.mgd.csa_waiting_bcn) {
+ ieee80211_chswitch_post_beacon(link);
+ /*
+ * If the CSA IE is still present in the beacon after
+ * the switch, we need to consider it as a new CSA
+ * (possibly to self) - this happens by not returning
+ * here so we'll get to the check below.
+ */
+ } else if (res) {
ieee80211_sta_abort_chanswitch(link);
- else
+ return;
+ } else {
drv_channel_switch_rx_beacon(sdata, &ch_switch);
- return;
- } else if (link->conf->csa_active || res) {
- /* disregard subsequent announcements if already processing */
- return;
+ return;
+ }
}
- if (link->conf->chandef.chan->band !=
- csa_ie.chandef.chan->band) {
+ /* nothing to do at all - no active CSA nor a new one */
+ if (res)
+ return;
+
+ if (link->conf->chanreq.oper.chan->band !=
+ csa_ie.chanreq.oper.chan->band) {
sdata_info(sdata,
"AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
link->u.mgd.bssid,
- csa_ie.chandef.chan->center_freq,
- csa_ie.chandef.width, csa_ie.chandef.center_freq1,
- csa_ie.chandef.center_freq2);
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
- if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
+ if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chanreq.oper,
IEEE80211_CHAN_DISABLED)) {
sdata_info(sdata,
"AP %pM switches to unsupported channel "
"(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
"disconnecting\n",
link->u.mgd.bssid,
- csa_ie.chandef.chan->center_freq,
- csa_ie.chandef.chan->freq_offset,
- csa_ie.chandef.width, csa_ie.chandef.center_freq1,
- csa_ie.chandef.freq1_offset,
- csa_ie.chandef.center_freq2);
+ csa_ie.chanreq.oper.chan->center_freq,
+ csa_ie.chanreq.oper.chan->freq_offset,
+ csa_ie.chanreq.oper.width,
+ csa_ie.chanreq.oper.center_freq1,
+ csa_ie.chanreq.oper.freq1_offset,
+ csa_ie.chanreq.oper.center_freq2);
goto drop_connection;
}
- if (cfg80211_chandef_identical(&csa_ie.chandef,
- &link->conf->chandef) &&
+ if (cfg80211_chandef_identical(&csa_ie.chanreq.oper,
+ &link->conf->chanreq.oper) &&
(!csa_ie.mode || !beacon)) {
if (link->u.mgd.csa_ignored_same_chan)
return;
@@ -1942,12 +2128,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
/*
- * Drop all TDLS peers - either we disconnect or move to a different
- * channel from this point on. There's no telling what our peer will do.
+ * Drop all TDLS peers on the affected link - either we disconnect or
+ * move to a different channel from this point on. There's no telling
+ * what our peer will do.
* The TDLS WIDER_BW scenario is also problematic, as peers might now
* have an incompatible wider chandef.
*/
- ieee80211_teardown_tdls_peers(sdata);
+ ieee80211_teardown_tdls_peers(link);
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
@@ -1959,8 +2146,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (local->use_chanctx &&
- !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
+ if (!ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
sdata_info(sdata,
"driver doesn't support chan-switch with channel contexts\n");
goto drop_connection;
@@ -1972,7 +2158,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
goto drop_connection;
}
- res = ieee80211_link_reserve_chanctx(link, &csa_ie.chandef,
+ res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq,
chanctx->mode, false);
if (res) {
sdata_info(sdata,
@@ -1982,18 +2168,21 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
link->conf->csa_active = true;
- link->csa_chandef = csa_ie.chandef;
- link->csa_block_tx = csa_ie.mode;
+ link->csa_chanreq = csa_ie.chanreq;
link->u.mgd.csa_ignored_same_chan = false;
link->u.mgd.beacon_crc_valid = false;
+ link->u.mgd.csa_blocked_tx = csa_ie.mode;
- if (link->csa_block_tx)
+ if (csa_ie.mode &&
+ !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) {
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_blocked_queues = true;
+ }
- cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef,
+ cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper,
link->link_id, csa_ie.count,
- csa_ie.mode, 0);
+ csa_ie.mode);
if (local->ops->channel_switch) {
/* use driver's channel switch callback */
@@ -2017,7 +2206,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
* reset when the disconnection worker runs.
*/
link->conf->csa_active = true;
- link->csa_block_tx = csa_ie.mode;
+ link->u.mgd.csa_blocked_tx = csa_ie.mode;
+ sdata->csa_blocked_queues =
+ csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA);
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
@@ -2414,7 +2605,7 @@ void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
struct ieee80211_link_data *link =
container_of(work, struct ieee80211_link_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = link->conf->chandef;
+ struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
struct ieee80211_sub_if_data *sdata = link->sdata;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -2769,7 +2960,7 @@ static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link,
ieee80211_check_rate_mask(link);
- link->u.mgd.bss = cbss;
+ link->conf->bss = cbss;
memcpy(link->u.mgd.bssid, cbss->bssid, ETH_ALEN);
if (sdata->vif.p2p ||
@@ -2917,7 +3108,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ifmgd->associated = false;
/* other links will be destroyed */
- sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.conf->bss = NULL;
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -2992,7 +3183,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->vif.cfg.ssid_len = 0;
/* remove AP and TDLS peers */
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
/* finally reset all BSS / config parameters */
if (!ieee80211_vif_is_mld(&sdata->vif))
@@ -3058,7 +3249,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->deflink.u.mgd.disable_wmm_tracking = false;
ifmgd->flags = 0;
- sdata->deflink.u.mgd.conn_flags = 0;
for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
struct ieee80211_link_data *link;
@@ -3070,27 +3260,47 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
}
sdata->vif.bss_conf.csa_active = false;
+ sdata->deflink.u.mgd.csa_blocked_tx = false;
sdata->deflink.u.mgd.csa_waiting_bcn = false;
sdata->deflink.u.mgd.csa_ignored_same_chan = false;
- if (sdata->deflink.csa_block_tx) {
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
/* existing TX TSPEC sessions no longer exist */
memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec));
wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk);
+ sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP;
sdata->vif.bss_conf.pwr_reduction = 0;
sdata->vif.bss_conf.tx_pwr_env_num = 0;
memset(sdata->vif.bss_conf.tx_pwr_env, 0,
sizeof(sdata->vif.bss_conf.tx_pwr_env));
+ sdata->vif.cfg.eml_cap = 0;
+ sdata->vif.cfg.eml_med_sync_delay = 0;
+ sdata->vif.cfg.mld_capa_op = 0;
+
memset(&sdata->u.mgd.ttlm_info, 0,
sizeof(sdata->u.mgd.ttlm_info));
wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
+
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->neg_ttlm_timeout_work);
+
+ sdata->u.mgd.removed_links = 0;
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.ml_reconf_work);
+
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->teardown_ttlm_work);
+
ieee80211_vif_set_links(sdata, 0, 0);
+
+ ifmgd->mcast_seq_last = IEEE80211_SN_MODULO;
}
static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
@@ -3238,7 +3448,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
sdata->vif.cfg.ssid,
sdata->vif.cfg.ssid_len,
- sdata->deflink.u.mgd.bss->channel);
+ sdata->deflink.conf->bss->channel);
}
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
@@ -3321,7 +3531,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
return NULL;
if (ifmgd->associated)
- cbss = sdata->deflink.u.mgd.bss;
+ cbss = sdata->deflink.conf->bss;
else if (ifmgd->auth_data)
cbss = ifmgd->auth_data->bss;
else if (ifmgd->assoc_data && ifmgd->assoc_data->link[0].bss)
@@ -3371,16 +3581,32 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
- bool tx;
+ bool tx = false;
lockdep_assert_wiphy(local->hw.wiphy);
if (!ifmgd->associated)
return;
- /* in MLO assume we have a link where we can TX the frame */
- tx = ieee80211_vif_is_mld(&sdata->vif) ||
- !sdata->deflink.csa_block_tx;
+ /* only transmit if we have a link that makes that worthwhile */
+ for (unsigned int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link;
+
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON_ONCE(!link))
+ continue;
+
+ if (link->u.mgd.csa_blocked_tx)
+ continue;
+
+ tx = true;
+ break;
+ }
if (!ifmgd->driver_disconnect) {
unsigned int link_id;
@@ -3400,8 +3626,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
continue;
- cfg80211_unlink_bss(local->hw.wiphy, link->u.mgd.bss);
- link->u.mgd.bss = NULL;
+ cfg80211_unlink_bss(local->hw.wiphy, link->conf->bss);
+ link->conf->bss = NULL;
}
}
@@ -3413,10 +3639,11 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
/* the other links will be destroyed */
sdata->vif.bss_conf.csa_active = false;
sdata->deflink.u.mgd.csa_waiting_bcn = false;
- if (sdata->deflink.csa_block_tx) {
+ sdata->deflink.u.mgd.csa_blocked_tx = false;
+ if (sdata->csa_blocked_queues) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ sdata->csa_blocked_queues = false;
}
ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
@@ -3518,7 +3745,6 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
sta_info_destroy_addr(sdata, auth_data->ap_addr);
/* other links are destroyed */
- sdata->deflink.u.mgd.conn_flags = 0;
eth_zero_addr(sdata->deflink.u.mgd.bssid);
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
BSS_CHANGED_BSSID);
@@ -3556,7 +3782,6 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.timer);
sta_info_destroy_addr(sdata, assoc_data->ap_addr);
- sdata->deflink.u.mgd.conn_flags = 0;
eth_zero_addr(sdata->deflink.u.mgd.bssid);
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
BSS_CHANGED_BSSID);
@@ -4006,11 +4231,13 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
struct ieee80211_local *local = sdata->local;
unsigned int link_id = link->link_id;
struct ieee80211_elems_parse_params parse_params = {
+ .mode = link->u.mgd.conn.mode,
.start = elem_start,
.len = elem_len,
.link_id = link_id == assoc_data->assoc_link_id ? -1 : link_id,
.from_ap = true,
};
+ bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
bool is_s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
const struct cfg80211_bss_ies *bss_ies = NULL;
@@ -4034,15 +4261,17 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS;
if (elems->ml_basic) {
- if (!(elems->ml_basic->control &
- cpu_to_le16(IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))) {
+ int bss_param_ch_cnt =
+ ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic);
+
+ if (bss_param_ch_cnt < 0) {
ret = false;
goto out;
}
- link->u.mgd.bss_param_ch_cnt =
- ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic);
+ link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt;
}
- } else if (!elems->prof ||
+ } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC ||
+ !elems->prof ||
!(elems->prof->control & prof_bss_param_ch_present)) {
ret = false;
goto out;
@@ -4086,9 +4315,9 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
if (!is_6ghz &&
((assoc_data->wmm && !elems->wmm_param) ||
- (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
+ (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
+ (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -4125,25 +4354,25 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
* have to include the IEs in the (re)association response.
*/
if (!elems->ht_cap_elem && bss_elems->ht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) {
elems->ht_cap_elem = bss_elems->ht_cap_elem;
sdata_info(sdata,
"AP bug: HT capability missing from AssocResp\n");
}
if (!elems->ht_operation && bss_elems->ht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) {
elems->ht_operation = bss_elems->ht_operation;
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
elems->vht_cap_elem = bss_elems->vht_cap_elem;
sdata_info(sdata,
"AP bug: VHT capa missing from AssocResp\n");
}
if (!elems->vht_operation && bss_elems->vht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
elems->vht_operation = bss_elems->vht_operation;
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
@@ -4155,8 +4384,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
/*
* We previously checked these in the beacon/probe response, so
* they should be present here. This is just a safety net.
+ * Note that the ieee80211_config_bw() below would also check
+ * for this (and more), but this has better error reporting.
*/
- if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
+ if (!is_6ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
sdata_info(sdata,
"HT AP is missing WMM params or HT capability/operation\n");
@@ -4164,7 +4395,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
goto out;
}
- if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
+ if (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)) {
sdata_info(sdata,
"VHT AP is missing VHT capability/operation\n");
@@ -4172,36 +4403,28 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
goto out;
}
- if (is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- !elems->he_6ghz_capa) {
- sdata_info(sdata,
- "HE 6 GHz AP is missing HE 6 GHz band capability\n");
+ /* check/update if AP changed anything in assoc response vs. scan */
+ if (ieee80211_config_bw(link, elems,
+ link_id == assoc_data->assoc_link_id,
+ changed)) {
ret = false;
goto out;
}
- if (WARN_ON(!link->conf->chandef.chan)) {
- ret = false;
- goto out;
- }
- sband = local->hw.wiphy->bands[link->conf->chandef.chan->band];
-
- if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
- (!elems->he_cap || !elems->he_operation)) {
- sdata_info(sdata,
- "HE AP is missing HE capability/operation\n");
+ if (WARN_ON(!link->conf->chanreq.oper.chan)) {
ret = false;
goto out;
}
+ sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band];
/* Set up internal HT/VHT capabilities */
- if (elems->ht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
+ if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT)
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
elems->ht_cap_elem,
link_sta);
if (elems->vht_cap_elem &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
const struct ieee80211_vht_cap *bss_vht_cap = NULL;
const struct cfg80211_bss_ies *ies;
@@ -4228,14 +4451,43 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
rcu_read_unlock();
}
- if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ if (elems->he_operation &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE &&
elems->he_cap) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
elems->he_6ghz_capa,
link_sta);
+ he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation);
+
+ if (is_6ghz && he_6ghz_oper) {
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP:
+ bss_conf->power_type = IEEE80211_REG_LPI_AP;
+ break;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ bss_conf->power_type = IEEE80211_REG_SP_AP;
+ break;
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ bss_conf->power_type = IEEE80211_REG_VLP_AP;
+ break;
+ default:
+ bss_conf->power_type = IEEE80211_REG_UNSET_AP;
+ break;
+ }
+ } else if (is_6ghz) {
+ link_info(link,
+ "HE 6 GHz operation missing (on %d MHz), expect issues\n",
+ bss_conf->chanreq.oper.chan->center_freq);
+ }
+
bss_conf->he_support = link_sta->pub->he_cap.has_he;
if (elems->rsnx && elems->rsnx_len &&
(elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
@@ -4249,7 +4501,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
link_sta, elems);
if (elems->eht_operation && elems->eht_cap &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_EHT) {
ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
@@ -4258,7 +4510,6 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
link_sta);
bss_conf->eht_support = link_sta->pub->eht_cap.has_eht;
- *changed |= BSS_CHANGED_EHT_PUNCTURING;
} else {
bss_conf->eht_support = false;
}
@@ -4456,7 +4707,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
bool support_160;
u8 chains = 1;
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HT)
return chains;
ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY);
@@ -4469,7 +4720,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
*/
}
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_VHT)
return chains;
vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
@@ -4488,7 +4739,7 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
chains = max(chains, nss);
}
- if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HE)
return chains;
ies = rcu_dereference(cbss->ies);
@@ -4539,533 +4790,331 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
return chains;
}
-static bool
-ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_bss_ies *ies,
- const struct ieee80211_he_operation *he_op)
+static void
+ieee80211_determine_our_sta_mode(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_assoc_request *req,
+ bool wmm_used, int link_id,
+ struct ieee80211_conn_settings *conn)
{
- const struct element *he_cap_elem;
- const struct ieee80211_he_cap_elem *he_cap;
- struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
- u16 mcs_80_map_tx, mcs_80_map_rx;
- u16 ap_min_req_set;
- int mcs_nss_size;
- int nss;
-
- he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY,
- ies->data, ies->len);
-
- if (!he_cap_elem)
- return false;
+ struct ieee80211_sta_ht_cap sta_ht_cap = sband->ht_cap;
+ bool is_5ghz = sband->band == NL80211_BAND_5GHZ;
+ bool is_6ghz = sband->band == NL80211_BAND_6GHZ;
+ const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
+ struct ieee80211_sta_vht_cap vht_cap;
- /* invalid HE IE */
- if (he_cap_elem->datalen < 1 + sizeof(*he_cap)) {
- sdata_info(sdata,
- "Invalid HE elem, Disable HE\n");
- return false;
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ conn->mode = IEEE80211_CONN_MODE_S1G;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_dbg(sdata, "operating as S1G STA\n");
+ return;
}
- /* skip one byte ext_tag_id */
- he_cap = (void *)(he_cap_elem->data + 1);
- mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap);
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
- /* invalid HE IE */
- if (he_cap_elem->datalen < 1 + sizeof(*he_cap) + mcs_nss_size) {
- sdata_info(sdata,
- "Invalid HE elem with nss size, Disable HE\n");
- return false;
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
+ if (req && req->flags & ASSOC_REQ_DISABLE_HT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HT disabled by flag, limiting to legacy\n");
+ goto out;
}
- /* mcs_nss is right after he_cap info */
- he_mcs_nss_supp = (void *)(he_cap + 1);
+ if (!wmm_used) {
+ mlme_link_id_dbg(sdata, link_id,
+ "WMM/QoS not supported, limiting to legacy\n");
+ goto out;
+ }
- mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
- mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80);
+ if (req) {
+ unsigned int i;
- /* P802.11-REVme/D0.3
- * 27.1.1 Introduction to the HE PHY
- * ...
- * An HE STA shall support the following features:
- * ...
- * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all
- * supported channel widths for HE SU PPDUs
- */
- if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) {
- sdata_info(sdata,
- "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n",
- mcs_80_map_tx, mcs_80_map_rx);
- return false;
+ for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) {
+ if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
+ req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
+ req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
+ netdev_info(sdata->dev,
+ "WEP/TKIP use, limiting to legacy\n");
+ goto out;
+ }
+ }
}
- if (!he_op)
- return true;
+ if (!sta_ht_cap.ht_supported && !is_6ghz) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HT not supported (and not on 6 GHz), limiting to legacy\n");
+ goto out;
+ }
- ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+ /* HT is fine */
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ IEEE80211_CONN_BW_LIMIT_40 :
+ IEEE80211_CONN_BW_LIMIT_20;
- /*
- * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
- * zeroes, which is nonsense, and completely inconsistent with itself
- * (it doesn't have 8 streams). Accept the settings in this case anyway.
- */
- if (!ap_min_req_set)
- return true;
-
- /* make sure the AP is consistent with itself
- *
- * P802.11-REVme/D0.3
- * 26.17.1 Basic HE BSS operation
- *
- * A STA that is operating in an HE BSS shall be able to receive and
- * transmit at each of the <HE-MCS, NSS> tuple values indicated by the
- * Basic HE-MCS And NSS Set field of the HE Operation parameter of the
- * MLME-START.request primitive and shall be able to receive at each of
- * the <HE-MCS, NSS> tuple values indicated by the Supported HE-MCS and
- * NSS Set field in the HE Capabilities parameter of the MLMESTART.request
- * primitive
- */
- for (nss = 8; nss > 0; nss--) {
- u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
- u8 ap_rx_val;
- u8 ap_tx_val;
+ memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
+ ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
- if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
- continue;
+ if (req && req->flags & ASSOC_REQ_DISABLE_VHT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "VHT disabled by flag, limiting to HT\n");
+ goto out;
+ }
- ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3;
- ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3;
+ if (vht_cap.vht_supported && is_5ghz) {
+ bool have_80mhz = false;
+ unsigned int i;
- if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) {
- sdata_info(sdata,
- "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n",
- nss, ap_rx_val, ap_rx_val, ap_op_val);
- return false;
+ if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40 MHz support on 5 GHz, limiting to HT\n");
+ goto out;
}
- }
- return true;
-}
+ /* Allow VHT if at least one channel on the sband supports 80 MHz */
+ for (i = 0; i < sband->n_channels; i++) {
+ if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+ IEEE80211_CHAN_NO_80MHZ))
+ continue;
-static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_supported_band *sband,
- const struct ieee80211_he_operation *he_op)
-{
- const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- u16 ap_min_req_set;
- int i;
+ have_80mhz = true;
+ break;
+ }
- if (!sta_he_cap || !he_op)
- return false;
+ if (!have_80mhz) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no 80 MHz channel support on 5 GHz, limiting to HT\n");
+ goto out;
+ }
+ } else if (is_5ghz) { /* !vht_supported but on 5 GHz */
+ mlme_link_id_dbg(sdata, link_id,
+ "no VHT support on 5 GHz, limiting to HT\n");
+ goto out;
+ }
- ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+ /* VHT - if we have - is fine, including 80 MHz, check 160 below again */
+ if (sband->band != NL80211_BAND_2GHZ) {
+ conn->mode = IEEE80211_CONN_MODE_VHT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160;
+ }
- /*
- * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
- * zeroes, which is nonsense, and completely inconsistent with itself
- * (it doesn't have 8 streams). Accept the settings in this case anyway.
- */
- if (!ap_min_req_set)
- return true;
+ if (is_5ghz &&
+ !(vht_cap.cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ |
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))) {
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
+ mlme_link_id_dbg(sdata, link_id,
+ "no VHT 160 MHz capability on 5 GHz, limiting to 80 MHz");
+ }
- /* Need to go over for 80MHz, 160MHz and for 80+80 */
- for (i = 0; i < 3; i++) {
- const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
- &sta_he_cap->he_mcs_nss_supp;
- u16 sta_mcs_map_rx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
- u16 sta_mcs_map_tx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
- u8 nss;
- bool verified = true;
+ if (req && req->flags & ASSOC_REQ_DISABLE_HE) {
+ mlme_link_id_dbg(sdata, link_id,
+ "HE disabled by flag, limiting to HT/VHT\n");
+ goto out;
+ }
- /*
- * For each band there is a maximum of 8 spatial streams
- * possible. Each of the sta_mcs_map_* is a 16-bit struct built
- * of 2 bits per NSS (1-8), with the values defined in enum
- * ieee80211_he_mcs_support. Need to make sure STA TX and RX
- * capabilities aren't less than the AP's minimum requirements
- * for this HE BSS per SS.
- * It is enough to find one such band that meets the reqs.
- */
- for (nss = 8; nss > 0; nss--) {
- u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
- u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
- u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ if (!he_cap) {
+ WARN_ON(is_6ghz);
+ mlme_link_id_dbg(sdata, link_id,
+ "no HE support, limiting to HT/VHT\n");
+ goto out;
+ }
- if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
- continue;
+ /* so we have HE */
+ conn->mode = IEEE80211_CONN_MODE_HE;
- /*
- * Make sure the HE AP doesn't require MCSs that aren't
- * supported by the client as required by spec
- *
- * P802.11-REVme/D0.3
- * 26.17.1 Basic HE BSS operation
- *
- * An HE STA shall not attempt to join * (MLME-JOIN.request primitive)
- * a BSS, unless it supports (i.e., is able to both transmit and
- * receive using) all of the <HE-MCS, NSS> tuples in the basic
- * HE-MCS and NSS set.
- */
- if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
- verified = false;
- break;
- }
+ /* check bandwidth */
+ switch (sband->band) {
+ default:
+ case NL80211_BAND_2GHZ:
+ if (he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
+ break;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40 MHz HE cap in 2.4 GHz, limiting to 20 MHz\n");
+ break;
+ case NL80211_BAND_5GHZ:
+ if (!(he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)) {
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ mlme_link_id_dbg(sdata, link_id,
+ "no 40/80 MHz HE cap in 5 GHz, limiting to 20 MHz\n");
+ break;
+ }
+ if (!(he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)) {
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_80);
+ mlme_link_id_dbg(sdata, link_id,
+ "no 160 MHz HE cap in 5 GHz, limiting to 80 MHz\n");
}
+ break;
+ case NL80211_BAND_6GHZ:
+ if (he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)
+ break;
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit,
+ IEEE80211_CONN_BW_LIMIT_80);
+ mlme_link_id_dbg(sdata, link_id,
+ "no 160 MHz HE cap in 6 GHz, limiting to 80 MHz\n");
+ break;
+ }
- if (verified)
- return true;
+ if (req && req->flags & ASSOC_REQ_DISABLE_EHT) {
+ mlme_link_id_dbg(sdata, link_id,
+ "EHT disabled by flag, limiting to HE\n");
+ goto out;
}
- /* If here, STA doesn't meet AP's HE min requirements */
- return false;
-}
+ eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ if (!eht_cap) {
+ mlme_link_id_dbg(sdata, link_id,
+ "no EHT support, limiting to HE\n");
+ goto out;
+ }
-static u8
-ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap,
- const struct ieee80211_sta_eht_cap *sta_eht_cap,
- unsigned int idx, int bw)
-{
- u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0];
- u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0];
+ /* we have EHT */
- /* handle us being a 20 MHz-only EHT STA - with four values
- * for MCS 0-7, 8-9, 10-11, 12-13.
- */
- if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL))
- return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx];
+ conn->mode = IEEE80211_CONN_MODE_EHT;
- /* the others have MCS 0-9 together, rather than separately from 0-7 */
- if (idx > 0)
- idx--;
+ /* check bandwidth */
+ if (is_6ghz &&
+ eht_cap->eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_320;
+ else if (is_6ghz)
+ mlme_link_id_dbg(sdata, link_id,
+ "no EHT 320 MHz cap in 6 GHz, limiting to 160 MHz\n");
- switch (bw) {
- case 0:
- return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx];
- case 1:
- if (!(he_phy_cap0 &
- (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)))
- return 0xff; /* pass check */
- return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx];
- case 2:
- if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ))
- return 0xff; /* pass check */
- return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx];
- }
-
- WARN_ON(1);
- return 0;
+out:
+ mlme_link_id_dbg(sdata, link_id,
+ "determined local STA to be %s, BW limited to %d MHz\n",
+ ieee80211_conn_mode_str(conn->mode),
+ 20 * (1 << conn->bw_limit));
}
-static bool
-ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_supported_band *sband,
- const struct ieee80211_eht_operation *eht_op)
+static void
+ieee80211_determine_our_sta_mode_auth(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_auth_request *req,
+ bool wmm_used,
+ struct ieee80211_conn_settings *conn)
{
- const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- const struct ieee80211_sta_eht_cap *sta_eht_cap =
- ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
- const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req;
- unsigned int i;
-
- if (!sta_he_cap || !sta_eht_cap || !eht_op)
- return false;
-
- req = &eht_op->basic_mcs_nss;
-
- for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) {
- u8 req_rx_nss, req_tx_nss;
- unsigned int bw;
-
- req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i],
- IEEE80211_EHT_MCS_NSS_RX);
- req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i],
- IEEE80211_EHT_MCS_NSS_TX);
+ ieee80211_determine_our_sta_mode(sdata, sband, NULL, wmm_used,
+ req->link_id > 0 ? req->link_id : 0,
+ conn);
+}
- for (bw = 0; bw < 3; bw++) {
- u8 have, have_rx_nss, have_tx_nss;
+static void
+ieee80211_determine_our_sta_mode_assoc(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct cfg80211_assoc_request *req,
+ bool wmm_used, int link_id,
+ struct ieee80211_conn_settings *conn)
+{
+ struct ieee80211_conn_settings tmp;
- have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap,
- sta_eht_cap,
- i, bw);
- have_rx_nss = u8_get_bits(have,
- IEEE80211_EHT_MCS_NSS_RX);
- have_tx_nss = u8_get_bits(have,
- IEEE80211_EHT_MCS_NSS_TX);
+ WARN_ON(!req);
- if (req_rx_nss > have_rx_nss ||
- req_tx_nss > have_tx_nss)
- return false;
- }
- }
+ ieee80211_determine_our_sta_mode(sdata, sband, req, wmm_used, link_id,
+ &tmp);
- return true;
+ conn->mode = min_t(enum ieee80211_conn_mode,
+ conn->mode, tmp.mode);
+ conn->bw_limit = min_t(enum ieee80211_conn_bw_limit,
+ conn->bw_limit, tmp.bw_limit);
}
static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link,
- struct cfg80211_bss *cbss,
- bool mlo,
- ieee80211_conn_flags_t *conn_flags)
+ int link_id,
+ struct cfg80211_bss *cbss, bool mlo,
+ struct ieee80211_conn_settings *conn)
{
struct ieee80211_local *local = sdata->local;
- const struct ieee80211_ht_cap *ht_cap = NULL;
- const struct ieee80211_ht_operation *ht_oper = NULL;
- const struct ieee80211_vht_operation *vht_oper = NULL;
- const struct ieee80211_he_operation *he_oper = NULL;
- const struct ieee80211_eht_operation *eht_oper = NULL;
- const struct ieee80211_s1g_oper_ie *s1g_oper = NULL;
- struct ieee80211_supported_band *sband;
- struct cfg80211_chan_def chandef;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
- bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
- bool supports_mlo = false;
- struct ieee80211_bss *bss = (void *)cbss->priv;
- struct ieee80211_elems_parse_params parse_params = {
- .link_id = -1,
- .from_ap = true,
- };
+ struct ieee80211_chan_req chanreq = {};
struct ieee802_11_elems *elems;
- const struct cfg80211_bss_ies *ies;
int ret;
u32 i;
- bool have_80mhz;
lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
+ elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id,
+ &chanreq);
- ies = rcu_dereference(cbss->ies);
- parse_params.start = ies->data;
- parse_params.len = ies->len;
- elems = ieee802_11_parse_elems_full(&parse_params);
- if (!elems) {
+ if (IS_ERR(elems)) {
rcu_read_unlock();
- return -ENOMEM;
- }
-
- sband = local->hw.wiphy->bands[cbss->channel->band];
-
- *conn_flags &= ~(IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ);
-
- /* disable HT/VHT/HE if we don't support them */
- if (!sband->ht_cap.ht_supported && !is_6ghz) {
- mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE/EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!sband->vht_cap.vht_supported && is_5ghz) {
- mlme_dbg(sdata, "VHT not supported, disabling VHT/HE/EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ return PTR_ERR(elems);
}
- if (!ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) {
- mlme_dbg(sdata, "HE not supported, disabling HE and EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) {
- mlme_dbg(sdata, "EHT not supported, disabling EHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_HT) && !is_6ghz) {
- ht_oper = elems->ht_operation;
- ht_cap = elems->ht_cap_elem;
-
- if (!ht_cap) {
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- ht_oper = NULL;
- }
- }
-
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_VHT) && !is_6ghz) {
- vht_oper = elems->vht_operation;
- if (vht_oper && !ht_oper) {
- vht_oper = NULL;
- sdata_info(sdata,
- "AP advertised VHT without HT, disabling HT/VHT/HE\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_HT;
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- *conn_flags |= IEEE80211_CONN_DISABLE_HE;
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
-
- if (!elems->vht_cap_elem) {
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- vht_oper = NULL;
- }
+ if (mlo && !elems->ml_basic) {
+ sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n");
+ rcu_read_unlock();
+ kfree(elems);
+ return -EINVAL;
}
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_HE)) {
- he_oper = elems->he_operation;
+ if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) {
+ struct ieee80211_bss_conf *bss_conf;
+ u8 j = 0;
- if (link && is_6ghz) {
- struct ieee80211_bss_conf *bss_conf;
- u8 j = 0;
+ bss_conf = link->conf;
- bss_conf = link->conf;
+ if (elems->pwr_constr_elem)
+ bss_conf->pwr_reduction = *elems->pwr_constr_elem;
- if (elems->pwr_constr_elem)
- bss_conf->pwr_reduction = *elems->pwr_constr_elem;
+ BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
+ ARRAY_SIZE(elems->tx_pwr_env));
- BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
- ARRAY_SIZE(elems->tx_pwr_env));
-
- for (i = 0; i < elems->tx_pwr_env_num; i++) {
- if (elems->tx_pwr_env_len[i] >
- sizeof(bss_conf->tx_pwr_env[j]))
- continue;
+ for (i = 0; i < elems->tx_pwr_env_num; i++) {
+ if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j]))
+ continue;
- bss_conf->tx_pwr_env_num++;
- memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
- elems->tx_pwr_env_len[i]);
- j++;
- }
+ bss_conf->tx_pwr_env_num++;
+ memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
+ elems->tx_pwr_env_len[i]);
+ j++;
}
-
- if (!ieee80211_verify_peer_he_mcs_support(sdata, ies, he_oper) ||
- !ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
- *conn_flags |= IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
}
-
- /*
- * EHT requires HE to be supported as well. Specifically for 6 GHz
- * channels, the operation channel information can only be deduced from
- * both the 6 GHz operation information (from the HE operation IE) and
- * EHT operation.
- */
- if (!(*conn_flags &
- (IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT)) &&
- he_oper) {
- const struct cfg80211_bss_ies *cbss_ies;
- const struct element *eht_ml_elem;
- const u8 *eht_oper_ie;
-
- cbss_ies = rcu_dereference(cbss->ies);
- eht_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_EHT_OPERATION,
- cbss_ies->data, cbss_ies->len);
- if (eht_oper_ie && eht_oper_ie[1] >=
- 1 + sizeof(struct ieee80211_eht_operation))
- eht_oper = (void *)(eht_oper_ie + 3);
- else
- eht_oper = NULL;
-
- if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper))
- *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
-
- eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
- cbss_ies->data, cbss_ies->len);
-
- /* data + 1 / datalen - 1 since it's an extended element */
- if (!(*conn_flags & IEEE80211_CONN_DISABLE_EHT) &&
- eht_ml_elem &&
- ieee80211_mle_type_ok(eht_ml_elem->data + 1,
- IEEE80211_ML_CONTROL_TYPE_BASIC,
- eht_ml_elem->datalen - 1)) {
- supports_mlo = true;
-
- sdata->vif.cfg.eml_cap =
- ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
- sdata->vif.cfg.eml_med_sync_delay =
- ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1);
- }
- }
-
- /* Allow VHT if at least one channel on the sband supports 80 MHz */
- have_80mhz = false;
- for (i = 0; i < sband->n_channels; i++) {
- if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_NO_80MHZ))
- continue;
-
- have_80mhz = true;
- break;
- }
-
- if (!have_80mhz) {
- sdata_info(sdata, "80 MHz not supported, disabling VHT\n");
- *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- }
-
- if (sband->band == NL80211_BAND_S1GHZ) {
- s1g_oper = elems->s1g_oper;
- if (!s1g_oper)
- sdata_info(sdata,
- "AP missing S1G operation element?\n");
- }
-
- *conn_flags |=
- ieee80211_determine_chantype(sdata, link, *conn_flags,
- sband,
- cbss->channel,
- bss->vht_cap_info,
- ht_oper, vht_oper,
- he_oper, eht_oper,
- s1g_oper,
- &chandef, false);
-
- if (link)
- link->needed_rx_chains =
- min(ieee80211_max_rx_chains(link, cbss),
- local->rx_chains);
-
rcu_read_unlock();
/* the element data was RCU protected so no longer valid anyway */
kfree(elems);
elems = NULL;
- if (*conn_flags & IEEE80211_CONN_DISABLE_HE && is_6ghz) {
- sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
- return -EINVAL;
- }
-
- if (mlo && !supports_mlo) {
- sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n");
- return -EINVAL;
- }
-
if (!link)
return 0;
+ rcu_read_lock();
+ link->needed_rx_chains = min(ieee80211_max_rx_chains(link, cbss),
+ local->rx_chains);
+ rcu_read_unlock();
+
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
* same control channel) try to use a smaller bandwidth.
*/
- ret = ieee80211_link_use_channel(link, &chandef,
+ ret = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
/* don't downgrade for 5 and 10 MHz channels, though. */
- if (chandef.width == NL80211_CHAN_WIDTH_5 ||
- chandef.width == NL80211_CHAN_WIDTH_10)
- goto out;
+ if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 ||
+ chanreq.oper.width == NL80211_CHAN_WIDTH_10)
+ return ret;
+
+ while (ret && chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT) {
+ ieee80211_chanreq_downgrade(&chanreq, conn);
- while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
- *conn_flags |=
- ieee80211_chandef_downgrade(&chandef);
- ret = ieee80211_link_use_channel(link, &chandef,
+ ret = ieee80211_link_use_channel(link, &chanreq,
IEEE80211_CHANCTX_SHARED);
}
- out:
+
return ret;
}
@@ -5126,6 +5175,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sta))
goto out_err;
+ sta->sta.spp_amsdu = assoc_data->spp_amsdu;
+
if (ieee80211_vif_is_mld(&sdata->vif)) {
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
if (!assoc_data->link[link_id].bss)
@@ -5189,8 +5240,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
link->conf->dtim_period = link->u.mgd.dtim_period ?: 1;
if (link_id != assoc_data->assoc_link_id) {
- err = ieee80211_prep_channel(sdata, link, cbss, true,
- &link->u.mgd.conn_flags);
+ link->u.mgd.conn = assoc_data->link[link_id].conn;
+
+ err = ieee80211_prep_channel(sdata, link, link_id, cbss,
+ true, &link->u.mgd.conn);
if (err) {
link_info(link, "prep_channel failed\n");
goto out_err;
@@ -5308,6 +5361,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
if (!assoc_data)
return;
+ parse_params.mode =
+ assoc_data->link[assoc_data->assoc_link_id].conn.mode;
+
if (!ether_addr_equal(assoc_data->ap_addr, mgmt->bssid) ||
!ether_addr_equal(assoc_data->ap_addr, mgmt->sa))
return;
@@ -5424,6 +5480,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->ap_addr);
goto abandon_assoc;
}
+
+ sdata->vif.cfg.eml_cap =
+ ieee80211_mle_get_eml_cap((const void *)elems->ml_basic);
+ sdata->vif.cfg.eml_med_sync_delay =
+ ieee80211_mle_get_eml_med_sync_delay((const void *)elems->ml_basic);
+ sdata->vif.cfg.mld_capa_op =
+ ieee80211_mle_get_mld_capa_op((const void *)elems->ml_basic);
}
sdata->vif.cfg.aid = aid;
@@ -5686,49 +5749,6 @@ static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
}
-static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
- const struct ieee80211_eht_operation *eht_oper,
- u64 *changed)
-{
- struct ieee80211_local *local = link->sdata->local;
- u16 bitmap = 0, extracted;
-
- if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
- (eht_oper->params &
- IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
- const struct ieee80211_eht_operation_info *info =
- (void *)eht_oper->optional;
- const u8 *disable_subchannel_bitmap = info->optional;
-
- bitmap = get_unaligned_le16(disable_subchannel_bitmap);
- }
-
- extracted = ieee80211_extract_dis_subch_bmap(eht_oper,
- &link->conf->chandef,
- bitmap);
-
- /* accept if there are no changes */
- if (!(*changed & BSS_CHANGED_BANDWIDTH) &&
- extracted == link->conf->eht_puncturing)
- return true;
-
- if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef)) {
- link_info(link,
- "Got an invalid disable subchannel bitmap from AP %pM: bitmap = 0x%x, bw = 0x%x. disconnect\n",
- link->u.mgd.bssid,
- bitmap,
- link->conf->chandef.width);
- return false;
- }
-
- if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))
- return false;
-
- ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed);
- return true;
-}
-
static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
@@ -5792,9 +5812,7 @@ out:
static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems)
{
- const struct ieee80211_multi_link_elem *ml;
const struct element *sub;
- ssize_t ml_len;
unsigned long removed_links = 0;
u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {};
u8 link_id;
@@ -5803,24 +5821,11 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf)
return;
- ml_len = cfg80211_defragment_element(elems->ml_reconf_elem,
- elems->ie_start,
- elems->total_len,
- elems->scratch_pos,
- elems->scratch + elems->scratch_len -
- elems->scratch_pos,
- WLAN_EID_FRAGMENT);
- if (ml_len < 0)
- return;
-
- elems->ml_reconf = (const void *)elems->scratch_pos;
- elems->ml_reconf_len = ml_len;
- ml = elems->ml_reconf;
-
/* Directly parse the sub elements as the common information doesn't
* hold any useful information.
*/
- for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
+ for_each_mle_subelement(sub, (const u8 *)elems->ml_reconf,
+ elems->ml_reconf_len) {
struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
u8 *pos = prof->variable;
u16 control;
@@ -5891,6 +5896,64 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
TU_TO_JIFFIES(delay));
}
+static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
+ u16 active_links, u16 dormant_links,
+ u16 suspended_links)
+{
+ u64 changed = 0;
+ int ret;
+
+ if (!active_links) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* If there is an active negotiated TTLM, it should be discarded by
+ * the new negotiated/advertised TTLM.
+ */
+ if (sdata->vif.neg_ttlm.valid) {
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
+ sdata->vif.suspended_links = 0;
+ changed = BSS_CHANGED_MLD_TTLM;
+ }
+
+ if (sdata->vif.active_links != active_links) {
+ /* usable links are affected when active_links are changed,
+ * so notify the driver about the status change
+ */
+ changed |= BSS_CHANGED_MLD_VALID_LINKS;
+ active_links &= sdata->vif.active_links;
+ if (!active_links)
+ active_links =
+ BIT(__ffs(sdata->vif.valid_links &
+ ~dormant_links));
+ ret = ieee80211_set_active_links(&sdata->vif, active_links);
+ if (ret) {
+ sdata_info(sdata, "Failed to set TTLM active links\n");
+ goto out;
+ }
+ }
+
+ ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
+ dormant_links);
+ if (ret) {
+ sdata_info(sdata, "Failed to set TTLM dormant links\n");
+ goto out;
+ }
+
+ sdata->vif.suspended_links = suspended_links;
+ if (sdata->vif.suspended_links)
+ changed |= BSS_CHANGED_MLD_TTLM;
+
+ ieee80211_vif_cfg_change_notify(sdata, changed);
+
+out:
+ if (ret)
+ ieee80211_disconnect(&sdata->vif, false);
+
+ return ret;
+}
+
static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
@@ -5898,30 +5961,19 @@ static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
u.mgd.ttlm_work.work);
- int ret;
new_active_links = sdata->u.mgd.ttlm_info.map &
sdata->vif.valid_links;
new_dormant_links = ~sdata->u.mgd.ttlm_info.map &
sdata->vif.valid_links;
- if (!new_active_links) {
- ieee80211_disconnect(&sdata->vif, false);
- return;
- }
ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0);
- new_active_links = BIT(ffs(new_active_links) - 1);
- ieee80211_set_active_links(&sdata->vif, new_active_links);
-
- ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
- new_dormant_links);
+ if (ieee80211_ttlm_set_links(sdata, new_active_links, new_dormant_links,
+ 0))
+ return;
sdata->u.mgd.ttlm_info.active = true;
sdata->u.mgd.ttlm_info.switch_time = 0;
-
- if (!ret)
- ieee80211_vif_cfg_change_notify(sdata,
- BSS_CHANGED_MLD_VALID_LINKS);
}
static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data)
@@ -6131,6 +6183,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
u8 *bssid, *variable = mgmt->u.beacon.variable;
u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
struct ieee80211_elems_parse_params parse_params = {
+ .mode = link->u.mgd.conn.mode,
.link_id = -1,
.from_ap = true,
};
@@ -6184,7 +6237,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
link->u.mgd.dtim_period = elems->dtim_period;
link->u.mgd.have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
- if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
+ !ieee80211_is_s1g_beacon(hdr->frame_control)) {
link->conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
link->conf->sync_device_ts =
@@ -6213,7 +6267,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if (!ifmgd->associated ||
- !ieee80211_rx_our_beacon(bssid, link->u.mgd.bss))
+ !ieee80211_rx_our_beacon(bssid, link->conf->bss))
return;
bssid = link->u.mgd.bssid;
@@ -6240,7 +6294,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
*/
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
- parse_params.bss = link->u.mgd.bss;
+ parse_params.bss = link->conf->bss;
parse_params.filter = care_about_ies;
parse_params.crc = ncrc;
elems = ieee802_11_parse_elems_full(&parse_params);
@@ -6302,9 +6356,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
}
- if (link->u.mgd.csa_waiting_bcn)
- ieee80211_chswitch_post_beacon(link);
-
/*
* Update beacon timing and dtim count on every beacon appearance. This
* will allow the driver to use the most updated values. Do it before
@@ -6378,21 +6429,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
goto free;
}
- if (WARN_ON(!link->conf->chandef.chan))
+ if (WARN_ON(!link->conf->chanreq.oper.chan))
goto free;
- sband = local->hw.wiphy->bands[link->conf->chandef.chan->band];
+ sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band];
changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems);
- if (ieee80211_config_bw(link, elems->ht_cap_elem,
- elems->vht_cap_elem, elems->ht_operation,
- elems->vht_operation, elems->he_operation,
- elems->eht_operation,
- elems->s1g_oper, bssid, &changed)) {
- sdata_info(sdata,
- "failed to follow AP %pM bandwidth change, disconnect\n",
- bssid);
+ if (ieee80211_config_bw(link, elems, true, &changed)) {
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
true, deauth_buf);
@@ -6414,21 +6458,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
elems->pwr_constr_elem,
elems->cisco_dtpc_elem);
- if (elems->eht_operation &&
- !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
- if (!ieee80211_config_puncturing(link, elems->eht_operation,
- &changed)) {
- ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
- WLAN_REASON_DEAUTH_LEAVING,
- true, deauth_buf);
- ieee80211_report_disconnect(sdata, deauth_buf,
- sizeof(deauth_buf), true,
- WLAN_REASON_DEAUTH_LEAVING,
- false);
- goto free;
- }
- }
-
ieee80211_ml_reconfiguration(sdata, elems);
ieee80211_process_adv_ttlm(sdata, elems,
le64_to_cpu(mgmt->u.beacon.timestamp));
@@ -6438,6 +6467,430 @@ free:
kfree(elems);
}
+static void ieee80211_apply_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm neg_ttlm)
+{
+ u16 new_active_links, new_dormant_links, new_suspended_links, map = 0;
+ u8 i;
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++)
+ map |= neg_ttlm.downlink[i] | neg_ttlm.uplink[i];
+
+ /* If there is an active TTLM, unset previously suspended links */
+ if (sdata->vif.neg_ttlm.valid)
+ sdata->vif.dormant_links &= ~sdata->vif.suspended_links;
+
+ /* exclude links that are already disabled by advertised TTLM */
+ new_active_links =
+ map & sdata->vif.valid_links & ~sdata->vif.dormant_links;
+ new_suspended_links =
+ (~map & sdata->vif.valid_links) & ~sdata->vif.dormant_links;
+ new_dormant_links = sdata->vif.dormant_links | new_suspended_links;
+ if (ieee80211_ttlm_set_links(sdata, new_active_links,
+ new_dormant_links, new_suspended_links))
+ return;
+
+ sdata->vif.neg_ttlm = neg_ttlm;
+ sdata->vif.neg_ttlm.valid = true;
+}
+
+static void ieee80211_neg_ttlm_timeout_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.neg_ttlm_timeout_work.work);
+
+ sdata_info(sdata,
+ "No negotiated TTLM response from AP, disconnecting.\n");
+
+ __ieee80211_disconnect(sdata);
+}
+
+static void
+ieee80211_neg_ttlm_add_suggested_map(struct sk_buff *skb,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ u8 i, direction[IEEE80211_TTLM_MAX_CNT];
+
+ if (memcmp(neg_ttlm->downlink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->downlink))) {
+ direction[0] = IEEE80211_TTLM_DIRECTION_DOWN;
+ direction[1] = IEEE80211_TTLM_DIRECTION_UP;
+ } else {
+ direction[0] = IEEE80211_TTLM_DIRECTION_BOTH;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(direction); i++) {
+ u8 tid, len, map_ind = 0, *len_pos, *map_ind_pos, *pos;
+ __le16 map;
+
+ len = sizeof(struct ieee80211_ttlm_elem) + 1 + 1;
+
+ pos = skb_put(skb, len + 2);
+ *pos++ = WLAN_EID_EXTENSION;
+ len_pos = pos++;
+ *pos++ = WLAN_EID_EXT_TID_TO_LINK_MAPPING;
+ *pos++ = direction[i];
+ map_ind_pos = pos++;
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ map = direction[i] == IEEE80211_TTLM_DIRECTION_UP ?
+ cpu_to_le16(neg_ttlm->uplink[tid]) :
+ cpu_to_le16(neg_ttlm->downlink[tid]);
+ if (!map)
+ continue;
+
+ len += 2;
+ map_ind |= BIT(tid);
+ skb_put_data(skb, &map, sizeof(map));
+ }
+
+ *map_ind_pos = map_ind;
+ *len_pos = len;
+
+ if (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH)
+ break;
+ }
+}
+
+static void
+ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm,
+ u8 dialog_token)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req);
+ int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
+ 2 * 2 * IEEE80211_TTLM_NUM_TIDS;
+
+ skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->tx_headroom);
+ mgmt = skb_put_zero(skb, hdr_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_req.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_REQ;
+ mgmt->u.action.u.ttlm_req.dialog_token = dialog_token;
+ ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
+ ieee80211_tx_skb(sdata, skb);
+}
+
+int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ttlm_params *params)
+{
+ struct ieee80211_neg_ttlm neg_ttlm = {};
+ u8 i;
+
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ !(sdata->vif.cfg.mld_capa_op &
+ IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP))
+ return -EINVAL;
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if ((params->dlink[i] & ~sdata->vif.valid_links) ||
+ (params->ulink[i] & ~sdata->vif.valid_links))
+ return -EINVAL;
+
+ neg_ttlm.downlink[i] = params->dlink[i];
+ neg_ttlm.uplink[i] = params->ulink[i];
+ }
+
+ if (drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm) !=
+ NEG_TTLM_RES_ACCEPT)
+ return -EINVAL;
+
+ ieee80211_apply_neg_ttlm(sdata, neg_ttlm);
+ sdata->u.mgd.dialog_token_alloc++;
+ ieee80211_send_neg_ttlm_req(sdata, &sdata->vif.neg_ttlm,
+ sdata->u.mgd.dialog_token_alloc);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work,
+ IEEE80211_NEG_TTLM_REQ_TIMEOUT);
+ return 0;
+}
+
+static void
+ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_neg_ttlm_res ttlm_res,
+ u8 dialog_token,
+ struct ieee80211_neg_ttlm *neg_ttlm)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res);
+ int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
+ 2 * 2 * IEEE80211_TTLM_NUM_TIDS;
+
+ skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->tx_headroom);
+ mgmt = skb_put_zero(skb, hdr_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_res.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_RES;
+ mgmt->u.action.u.ttlm_res.dialog_token = dialog_token;
+ switch (ttlm_res) {
+ default:
+ WARN_ON(1);
+ fallthrough;
+ case NEG_TTLM_RES_REJECT:
+ mgmt->u.action.u.ttlm_res.status_code =
+ WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING;
+ break;
+ case NEG_TTLM_RES_ACCEPT:
+ mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS;
+ break;
+ case NEG_TTLM_RES_SUGGEST_PREFERRED:
+ mgmt->u.action.u.ttlm_res.status_code =
+ WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED;
+ ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
+ break;
+ }
+
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static int
+ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_ttlm_elem *ttlm,
+ struct ieee80211_neg_ttlm *neg_ttlm,
+ u8 *direction)
+{
+ u8 control, link_map_presence, map_size, tid;
+ u8 *pos;
+
+ /* The element size was already validated in
+ * ieee80211_tid_to_link_map_size_ok()
+ */
+ pos = (void *)ttlm->optional;
+
+ control = ttlm->control;
+
+ /* mapping switch time and expected duration fields are not expected
+ * in case of negotiated TTLM
+ */
+ if (control & (IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT |
+ IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT)) {
+ mlme_dbg(sdata,
+ "Invalid TTLM element in negotiated TTLM request\n");
+ return -EINVAL;
+ }
+
+ if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) {
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ neg_ttlm->downlink[tid] = sdata->vif.valid_links;
+ neg_ttlm->uplink[tid] = sdata->vif.valid_links;
+ }
+ *direction = IEEE80211_TTLM_DIRECTION_BOTH;
+ return 0;
+ }
+
+ *direction = u8_get_bits(control, IEEE80211_TTLM_CONTROL_DIRECTION);
+ if (*direction != IEEE80211_TTLM_DIRECTION_DOWN &&
+ *direction != IEEE80211_TTLM_DIRECTION_UP &&
+ *direction != IEEE80211_TTLM_DIRECTION_BOTH)
+ return -EINVAL;
+
+ link_map_presence = *pos;
+ pos++;
+
+ if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE)
+ map_size = 1;
+ else
+ map_size = 2;
+
+ for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) {
+ u16 map;
+
+ if (link_map_presence & BIT(tid)) {
+ map = ieee80211_get_ttlm(map_size, pos);
+ if (!map) {
+ mlme_dbg(sdata,
+ "No active links for TID %d", tid);
+ return -EINVAL;
+ }
+ } else {
+ map = 0;
+ }
+
+ switch (*direction) {
+ case IEEE80211_TTLM_DIRECTION_BOTH:
+ neg_ttlm->downlink[tid] = map;
+ neg_ttlm->uplink[tid] = map;
+ break;
+ case IEEE80211_TTLM_DIRECTION_DOWN:
+ neg_ttlm->downlink[tid] = map;
+ break;
+ case IEEE80211_TTLM_DIRECTION_UP:
+ neg_ttlm->uplink[tid] = map;
+ break;
+ default:
+ return -EINVAL;
+ }
+ pos += map_size;
+ }
+ return 0;
+}
+
+void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+{
+ u8 dialog_token, direction[IEEE80211_TTLM_MAX_CNT] = {}, i;
+ size_t ies_len;
+ enum ieee80211_neg_ttlm_res ttlm_res = NEG_TTLM_RES_ACCEPT;
+ struct ieee802_11_elems *elems = NULL;
+ struct ieee80211_neg_ttlm neg_ttlm = {};
+
+ BUILD_BUG_ON(ARRAY_SIZE(direction) != ARRAY_SIZE(elems->ttlm));
+
+ if (!ieee80211_vif_is_mld(&sdata->vif))
+ return;
+
+ dialog_token = mgmt->u.action.u.ttlm_req.dialog_token;
+ ies_len = len - offsetof(struct ieee80211_mgmt,
+ u.action.u.ttlm_req.variable);
+ elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable,
+ ies_len, true, NULL);
+ if (!elems) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+
+ for (i = 0; i < elems->ttlm_num; i++) {
+ if (ieee80211_parse_neg_ttlm(sdata, elems->ttlm[i],
+ &neg_ttlm, &direction[i]) ||
+ (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH &&
+ elems->ttlm_num != 1)) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+ }
+
+ if (!elems->ttlm_num ||
+ (elems->ttlm_num == 2 && direction[0] == direction[1])) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+
+ for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) {
+ if ((neg_ttlm.downlink[i] &&
+ (neg_ttlm.downlink[i] & ~sdata->vif.valid_links)) ||
+ (neg_ttlm.uplink[i] &&
+ (neg_ttlm.uplink[i] & ~sdata->vif.valid_links))) {
+ ttlm_res = NEG_TTLM_RES_REJECT;
+ goto out;
+ }
+ }
+
+ ttlm_res = drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm);
+
+ if (ttlm_res != NEG_TTLM_RES_ACCEPT)
+ goto out;
+
+ ieee80211_apply_neg_ttlm(sdata, neg_ttlm);
+out:
+ kfree(elems);
+ ieee80211_send_neg_ttlm_res(sdata, ttlm_res, dialog_token, &neg_ttlm);
+}
+
+void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+{
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ mgmt->u.action.u.ttlm_req.dialog_token !=
+ sdata->u.mgd.dialog_token_alloc)
+ return;
+
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.neg_ttlm_timeout_work);
+
+ /* MLD station sends a TID to link mapping request, mainly to handle
+ * BTM (BSS transition management) request, in which case it needs to
+ * restrict the active links set.
+ * In this case it's not expected that the MLD AP will reject the
+ * negotiated TTLM request.
+ * This can be better implemented in the future, to handle request
+ * rejections.
+ */
+ if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS)
+ __ieee80211_disconnect(sdata);
+}
+
+static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ u16 new_dormant_links;
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.teardown_ttlm_work);
+
+ if (!sdata->vif.neg_ttlm.valid)
+ return;
+
+ memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm));
+ new_dormant_links =
+ sdata->vif.dormant_links & ~sdata->vif.suspended_links;
+ sdata->vif.suspended_links = 0;
+ ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
+ new_dormant_links);
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_TTLM |
+ BSS_CHANGED_MLD_VALID_LINKS);
+}
+
+void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ int frame_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.ttlm_tear_down);
+ struct ieee80211_tx_info *info;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, frame_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT;
+ mgmt->u.action.u.ttlm_tear_down.action_code =
+ WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN;
+
+ info = IEEE80211_SKB_CB(skb);
+ info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+ info->status_data = IEEE80211_STATUS_TYPE_NEG_TTLM;
+ ieee80211_tx_skb(sdata, skb);
+}
+EXPORT_SYMBOL(ieee80211_send_teardown_neg_ttlm);
+
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
@@ -7067,6 +7520,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ieee80211_sta_handle_tspec_ac_params_wk);
wiphy_delayed_work_init(&ifmgd->ttlm_work,
ieee80211_tid_to_link_map_work);
+ wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work,
+ ieee80211_neg_ttlm_timeout_work);
+ wiphy_work_init(&ifmgd->teardown_ttlm_work,
+ ieee80211_teardown_ttlm_work);
ifmgd->flags = 0;
ifmgd->powersave = sdata->wdev.ps;
@@ -7076,6 +7533,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
spin_lock_init(&ifmgd->teardown_lock);
ifmgd->teardown_skb = NULL;
ifmgd->orig_teardown_skb = NULL;
+ ifmgd->mcast_seq_last = IEEE80211_SN_MODULO;
}
static void ieee80211_recalc_smps_work(struct wiphy *wiphy,
@@ -7095,7 +7553,6 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
unsigned int link_id = link->link_id;
link->u.mgd.p2p_noa_index = -1;
- link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
link->smps_mode = IEEE80211_SMPS_OFF;
@@ -7135,6 +7592,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss, s8 link_id,
const u8 *ap_mld_addr, bool assoc,
+ struct ieee80211_conn_settings *conn,
bool override)
{
struct ieee80211_local *local = sdata->local;
@@ -7266,13 +7724,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
}
if (new_sta || override) {
- err = ieee80211_prep_channel(sdata, link, cbss, mlo,
- &link->u.mgd.conn_flags);
+ /*
+ * Only set this if we're also going to calculate the AP
+ * settings etc., otherwise this was set before in a
+ * previous call. Note override is set to %true in assoc
+ * if the settings were changed.
+ */
+ link->u.mgd.conn = *conn;
+ err = ieee80211_prep_channel(sdata, link, link->link_id, cbss,
+ mlo, &link->u.mgd.conn);
if (err) {
if (new_sta)
sta_info_free(local, new_sta);
goto out_err;
}
+ /* pass out for use in assoc */
+ *conn = link->u.mgd.conn;
}
if (new_sta) {
@@ -7294,7 +7761,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
err);
- goto out_err;
+ goto out_release_chan;
}
} else
WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid));
@@ -7305,8 +7772,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
return 0;
+out_release_chan:
+ ieee80211_link_release_channel(link);
out_err:
- ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
return err;
}
@@ -7387,10 +7855,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
+ struct ieee80211_conn_settings conn;
struct ieee80211_link_data *link;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_bss *bss;
u16 auth_alg;
int err;
- bool cont_auth;
+ bool cont_auth, wmm_used;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -7521,8 +7992,17 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
/* needed for transmitting the auth frame(s) properly */
memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN);
+ bss = (void *)req->bss->priv;
+ wmm_used = bss->wmm_used && (local->hw.queues >= IEEE80211_NUM_ACS);
+
+ sband = local->hw.wiphy->bands[req->bss->channel->band];
+
+ ieee80211_determine_our_sta_mode_auth(sdata, sband, req, wmm_used,
+ &conn);
+
err = ieee80211_prep_connection(sdata, req->bss, req->link_id,
- req->ap_mld_addr, cont_auth, false);
+ req->ap_mld_addr, cont_auth,
+ &conn, false);
if (err)
goto err_clear;
@@ -7561,38 +8041,33 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
return err;
}
-static ieee80211_conn_flags_t
+static void
ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgd_assoc_data *assoc_data,
struct cfg80211_assoc_request *req,
- ieee80211_conn_flags_t conn_flags,
+ struct ieee80211_conn_settings *conn,
unsigned int link_id)
{
struct ieee80211_local *local = sdata->local;
const struct cfg80211_bss_ies *bss_ies;
struct ieee80211_supported_band *sband;
- const struct element *ht_elem, *vht_elem;
struct ieee80211_link_data *link;
struct cfg80211_bss *cbss;
struct ieee80211_bss *bss;
- bool is_5ghz, is_6ghz;
cbss = assoc_data->link[link_id].bss;
if (WARN_ON(!cbss))
- return 0;
+ return;
bss = (void *)cbss->priv;
sband = local->hw.wiphy->bands[cbss->channel->band];
if (WARN_ON(!sband))
- return 0;
+ return;
link = sdata_dereference(sdata->link[link_id], sdata);
if (WARN_ON(!link))
- return 0;
-
- is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
- is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+ return;
/* for MLO connections assume advertising all rates is OK */
if (!req->ap_mld_addr) {
@@ -7609,40 +8084,18 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
assoc_data->ie_pos += req->links[link_id].elems_len;
}
- rcu_read_lock();
- ht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION);
- if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation))
- assoc_data->link[link_id].ap_ht_param =
- ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param;
- else if (!is_6ghz)
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- vht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
- if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) {
- memcpy(&assoc_data->link[link_id].ap_vht_cap, vht_elem->data,
- sizeof(struct ieee80211_vht_cap));
- } else if (is_5ghz) {
- link_info(link,
- "VHT capa missing/short, disabling VHT/HE/EHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_VHT |
- IEEE80211_CONN_DISABLE_HE |
- IEEE80211_CONN_DISABLE_EHT;
- }
- rcu_read_unlock();
-
link->u.mgd.beacon_crc_valid = false;
link->u.mgd.dtim_period = 0;
link->u.mgd.have_beacon = false;
- /* override HT/VHT configuration only if the AP and we support it */
- if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ /* override HT configuration only if the AP and we support it */
+ if (conn->mode >= IEEE80211_CONN_MODE_HT) {
struct ieee80211_sta_ht_cap sta_ht_cap;
memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
}
- link->conf->eht_puncturing = 0;
-
rcu_read_lock();
bss_ies = rcu_dereference(cbss->beacon_ies);
if (bss_ies) {
@@ -7663,7 +8116,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
}
if (bss_ies) {
- const struct ieee80211_eht_operation *eht_oper;
const struct element *elem;
elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
@@ -7680,32 +8132,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
link->conf->ema_ap = true;
else
link->conf->ema_ap = false;
-
- elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION,
- bss_ies->data, bss_ies->len);
- eht_oper = (const void *)(elem->data + 1);
-
- if (elem &&
- ieee80211_eht_oper_size_ok((const void *)(elem->data + 1),
- elem->datalen - 1) &&
- (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
- (eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
- const struct ieee80211_eht_operation_info *info =
- (void *)eht_oper->optional;
- const u8 *disable_subchannel_bitmap = info->optional;
- u16 bitmap;
-
- bitmap = get_unaligned_le16(disable_subchannel_bitmap);
- if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef) &&
- !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)))
- ieee80211_handle_puncturing_bitmap(link,
- eht_oper,
- bitmap,
- NULL);
- else
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
}
rcu_read_unlock();
@@ -7732,8 +8158,67 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
} else {
link->smps_mode = link->u.mgd.req_smps;
}
+}
+
+static int
+ieee80211_mgd_get_ap_ht_vht_capa(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgd_assoc_data *assoc_data,
+ int link_id)
+{
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ enum nl80211_band band = cbss->channel->band;
+ struct ieee80211_supported_band *sband;
+ const struct element *elem;
+ int err;
+
+ /* neither HT nor VHT elements used on 6 GHz */
+ if (band == NL80211_BAND_6GHZ)
+ return 0;
+
+ if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_HT)
+ return 0;
+
+ rcu_read_lock();
+ elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION);
+ if (!elem || elem->datalen < sizeof(struct ieee80211_ht_operation)) {
+ mlme_link_id_dbg(sdata, link_id, "no HT operation on BSS %pM\n",
+ cbss->bssid);
+ err = -EINVAL;
+ goto out_rcu;
+ }
+ assoc_data->link[link_id].ap_ht_param =
+ ((struct ieee80211_ht_operation *)(elem->data))->ht_param;
+ rcu_read_unlock();
- return conn_flags;
+ if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_VHT)
+ return 0;
+
+ /* some drivers want to support VHT on 2.4 GHz even */
+ sband = sdata->local->hw.wiphy->bands[band];
+ if (!sband->vht_cap.vht_supported)
+ return 0;
+
+ rcu_read_lock();
+ elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
+ /* but even then accept it not being present on the AP */
+ if (!elem && band == NL80211_BAND_2GHZ) {
+ err = 0;
+ goto out_rcu;
+ }
+ if (!elem || elem->datalen < sizeof(struct ieee80211_vht_cap)) {
+ mlme_link_id_dbg(sdata, link_id, "no VHT capa on BSS %pM\n",
+ cbss->bssid);
+ err = -EINVAL;
+ goto out_rcu;
+ }
+ memcpy(&assoc_data->link[link_id].ap_vht_cap, elem->data,
+ sizeof(struct ieee80211_vht_cap));
+ rcu_read_unlock();
+
+ return 0;
+out_rcu:
+ rcu_read_unlock();
+ return err;
}
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
@@ -7745,11 +8230,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgd_assoc_data *assoc_data;
const struct element *ssid_elem;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
- ieee80211_conn_flags_t conn_flags = 0;
struct ieee80211_link_data *link;
struct cfg80211_bss *cbss;
- struct ieee80211_bss *bss;
- bool override;
+ bool override, uapsd_supported;
+ bool match_auth;
int i, err;
size_t size = sizeof(*assoc_data) + req->ie_len;
@@ -7768,44 +8252,26 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
sdata_info(sdata, "AP is in CSA process, reject assoc\n");
- kfree(assoc_data);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
rcu_read_lock();
ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
- kfree(assoc_data);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen);
assoc_data->ssid_len = ssid_elem->datalen;
- memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
- vif_cfg->ssid_len = assoc_data->ssid_len;
rcu_read_unlock();
- if (req->ap_mld_addr) {
- for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
- if (!req->links[i].bss)
- continue;
- link = sdata_dereference(sdata->link[i], sdata);
- if (link)
- ether_addr_copy(assoc_data->link[i].addr,
- link->conf->addr);
- else
- eth_random_addr(assoc_data->link[i].addr);
- }
- } else {
- memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN);
- }
-
- assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
-
- memcpy(assoc_data->ap_addr,
- req->ap_mld_addr ?: req->bss->bssid,
- ETH_ALEN);
+ if (req->ap_mld_addr)
+ memcpy(assoc_data->ap_addr, req->ap_mld_addr, ETH_ALEN);
+ else
+ memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN);
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -7823,98 +8289,148 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
false);
}
- if (ifmgd->auth_data && !ifmgd->auth_data->done) {
- err = -EBUSY;
- goto err_free;
- }
+ memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
+ memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
+ sizeof(ifmgd->ht_capa_mask));
- if (ifmgd->assoc_data) {
- err = -EBUSY;
- goto err_free;
- }
+ memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
+ memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
+ sizeof(ifmgd->vht_capa_mask));
- if (ifmgd->auth_data) {
- bool match;
+ memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa));
+ memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask,
+ sizeof(ifmgd->s1g_capa_mask));
- /* keep sta info, bssid if matching */
- match = ether_addr_equal(ifmgd->auth_data->ap_addr,
- assoc_data->ap_addr) &&
- ifmgd->auth_data->link_id == req->link_id;
+ /* keep some setup (AP STA, channel, ...) if matching */
+ match_auth = ifmgd->auth_data &&
+ ether_addr_equal(ifmgd->auth_data->ap_addr,
+ assoc_data->ap_addr) &&
+ ifmgd->auth_data->link_id == req->link_id;
- /* Cleanup is delayed if auth_data matches */
- if (!match)
- ieee80211_destroy_auth_data(sdata, false);
- }
+ if (req->ap_mld_addr) {
+ uapsd_supported = true;
- /* prepare assoc data */
+ if (req->flags & (ASSOC_REQ_DISABLE_HT |
+ ASSOC_REQ_DISABLE_VHT |
+ ASSOC_REQ_DISABLE_HE |
+ ASSOC_REQ_DISABLE_EHT)) {
+ err = -EINVAL;
+ goto err_free;
+ }
- bss = (void *)cbss->priv;
- assoc_data->wmm = bss->wmm_used &&
- (local->hw.queues >= IEEE80211_NUM_ACS);
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ struct ieee80211_supported_band *sband;
+ struct cfg80211_bss *link_cbss = req->links[i].bss;
+ struct ieee80211_bss *bss;
- /*
- * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode.
- * We still associate in non-HT mode (11a/b/g) if any one of these
- * ciphers is configured as pairwise.
- * We can set this to true for non-11n hardware, that'll be checked
- * separately along with the peer capabilities.
- */
- for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) {
- if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
- req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
- req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- netdev_info(sdata->dev,
- "disabling HT/VHT/HE due to WEP/TKIP use\n");
+ if (!link_cbss)
+ continue;
+
+ bss = (void *)link_cbss->priv;
+
+ if (!bss->wmm_used) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
+
+ if (link_cbss->channel->band == NL80211_BAND_S1GHZ) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
+
+ link = sdata_dereference(sdata->link[i], sdata);
+ if (link)
+ ether_addr_copy(assoc_data->link[i].addr,
+ link->conf->addr);
+ else
+ eth_random_addr(assoc_data->link[i].addr);
+ sband = local->hw.wiphy->bands[link_cbss->channel->band];
+
+ if (match_auth && i == assoc_link_id && link)
+ assoc_data->link[i].conn = link->u.mgd.conn;
+ else
+ assoc_data->link[i].conn =
+ ieee80211_conn_settings_unlimited;
+ ieee80211_determine_our_sta_mode_assoc(sdata, sband,
+ req, true, i,
+ &assoc_data->link[i].conn);
+ assoc_data->link[i].bss = link_cbss;
+ assoc_data->link[i].disabled = req->links[i].disabled;
+
+ if (!bss->uapsd_supported)
+ uapsd_supported = false;
+
+ if (assoc_data->link[i].conn.mode < IEEE80211_CONN_MODE_EHT) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
+
+ err = ieee80211_mgd_get_ap_ht_vht_capa(sdata,
+ assoc_data, i);
+ if (err) {
+ err = -EINVAL;
+ req->links[i].error = err;
+ goto err_free;
+ }
}
- }
- /* also disable HT/VHT/HE/EHT if the AP doesn't use WMM */
- if (!bss->wmm_used) {
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- netdev_info(sdata->dev,
- "disabling HT/VHT/HE as WMM/QoS is not supported by the AP\n");
- }
+ assoc_data->wmm = true;
+ } else {
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
- if (req->flags & ASSOC_REQ_DISABLE_HT) {
- mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n");
- conn_flags |= IEEE80211_CONN_DISABLE_HT;
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
- }
+ memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN);
+ assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
- if (req->flags & ASSOC_REQ_DISABLE_VHT) {
- mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_VHT;
- }
+ assoc_data->wmm = bss->wmm_used &&
+ (local->hw.queues >= IEEE80211_NUM_ACS);
+
+ if (cbss->channel->band == NL80211_BAND_6GHZ &&
+ req->flags & (ASSOC_REQ_DISABLE_HT |
+ ASSOC_REQ_DISABLE_VHT |
+ ASSOC_REQ_DISABLE_HE)) {
+ err = -EINVAL;
+ goto err_free;
+ }
- if (req->flags & ASSOC_REQ_DISABLE_HE) {
- mlme_dbg(sdata, "HE disabled by flag, disabling HE/EHT\n");
- conn_flags |= IEEE80211_CONN_DISABLE_HE;
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ assoc_data->link[0].bss = cbss;
+
+ if (match_auth)
+ assoc_data->link[0].conn = sdata->deflink.u.mgd.conn;
+ else
+ assoc_data->link[0].conn =
+ ieee80211_conn_settings_unlimited;
+ ieee80211_determine_our_sta_mode_assoc(sdata, sband, req,
+ assoc_data->wmm, 0,
+ &assoc_data->link[0].conn);
+
+ uapsd_supported = bss->uapsd_supported;
+
+ err = ieee80211_mgd_get_ap_ht_vht_capa(sdata, assoc_data, 0);
+ if (err)
+ goto err_free;
}
- if (req->flags & ASSOC_REQ_DISABLE_EHT)
- conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU;
- memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
- memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
- sizeof(ifmgd->ht_capa_mask));
+ if (ifmgd->auth_data && !ifmgd->auth_data->done) {
+ err = -EBUSY;
+ goto err_free;
+ }
- memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
- memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
- sizeof(ifmgd->vht_capa_mask));
+ if (ifmgd->assoc_data) {
+ err = -EBUSY;
+ goto err_free;
+ }
- memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa));
- memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask,
- sizeof(ifmgd->s1g_capa_mask));
+ /* Cleanup is delayed if auth_data matches */
+ if (ifmgd->auth_data && !match_auth)
+ ieee80211_destroy_auth_data(sdata, false);
if (req->ie && req->ie_len) {
memcpy(assoc_data->ie, req->ie, req->ie_len);
@@ -7946,19 +8462,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->assoc_link_id = assoc_link_id;
if (req->ap_mld_addr) {
- for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) {
- assoc_data->link[i].conn_flags = conn_flags;
- assoc_data->link[i].bss = req->links[i].bss;
- assoc_data->link[i].disabled = req->links[i].disabled;
- }
-
/* if there was no authentication, set up the link */
err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0);
if (err)
goto err_clear;
- } else {
- assoc_data->link[0].conn_flags = conn_flags;
- assoc_data->link[0].bss = cbss;
}
link = sdata_dereference(sdata->link[assoc_link_id], sdata);
@@ -7967,19 +8474,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
goto err_clear;
}
- /* keep old conn_flags from ieee80211_prep_channel() from auth */
- conn_flags |= link->u.mgd.conn_flags;
- conn_flags |= ieee80211_setup_assoc_link(sdata, assoc_data, req,
- conn_flags, assoc_link_id);
- override = link->u.mgd.conn_flags != conn_flags;
- link->u.mgd.conn_flags |= conn_flags;
+ override = link->u.mgd.conn.mode !=
+ assoc_data->link[assoc_link_id].conn.mode ||
+ link->u.mgd.conn.bw_limit !=
+ assoc_data->link[assoc_link_id].conn.bw_limit;
+ link->u.mgd.conn = assoc_data->link[assoc_link_id].conn;
+
+ ieee80211_setup_assoc_link(sdata, assoc_data, req, &link->u.mgd.conn,
+ assoc_link_id);
if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK),
"U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
- if (bss->wmm_used && bss->uapsd_supported &&
+ if (assoc_data->wmm && uapsd_supported &&
(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) {
assoc_data->uapsd = true;
ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
@@ -8023,27 +8532,29 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
continue;
if (i == assoc_data->assoc_link_id)
continue;
- /* only calculate the flags, hence link == NULL */
- err = ieee80211_prep_channel(sdata, NULL,
+ /* only calculate the mode, hence link == NULL */
+ err = ieee80211_prep_channel(sdata, NULL, i,
assoc_data->link[i].bss, true,
- &assoc_data->link[i].conn_flags);
+ &assoc_data->link[i].conn);
if (err) {
req->links[i].error = err;
goto err_clear;
}
}
+ memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
+ vif_cfg->ssid_len = assoc_data->ssid_len;
+
/* needed for transmitting the assoc frames properly */
memcpy(sdata->vif.cfg.ap_addr, assoc_data->ap_addr, ETH_ALEN);
err = ieee80211_prep_connection(sdata, cbss, req->link_id,
- req->ap_mld_addr, true, override);
+ req->ap_mld_addr, true,
+ &assoc_data->link[assoc_link_id].conn,
+ override);
if (err)
goto err_clear;
- assoc_data->link[assoc_data->assoc_link_id].conn_flags =
- link->u.mgd.conn_flags;
-
if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC)) {
const struct cfg80211_bss_ies *beacon_ies;
@@ -8204,9 +8715,9 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
&ifmgd->csa_connection_drop_work);
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&ifmgd->tdls_peer_del_work);
- wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
- &ifmgd->ml_reconf_work);
wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->neg_ttlm_timeout_work);
if (ifmgd->assoc_data)
ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 449af4e1cc..9ef14e475c 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -168,6 +168,7 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
struct ocb_setup *setup)
{
+ struct ieee80211_chan_req chanreq = { .oper = setup->chandef };
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
@@ -182,7 +183,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
return err;
@@ -207,7 +208,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
ifocb->joined = false;
- sta_info_flush(sdata);
+ sta_info_flush(sdata, -1);
spin_lock_bh(&ifocb->incomplete_lock);
while (!list_empty(&ifocb->incomplete_stations)) {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 6c40802025..65e1e9e971 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -86,7 +86,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
- if (WARN_ON(local->use_chanctx))
+ if (WARN_ON(!local->emulate_chanctx))
return;
/*
@@ -136,7 +136,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
- if (WARN_ON(local->use_chanctx))
+ if (WARN_ON(!local->emulate_chanctx))
return;
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -351,10 +351,13 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
* 20 MHz channel width) don't stop all the operations but still
* treat it as though the ROC operation started properly, so
* other ROC operations won't interfere with this one.
+ *
+ * Note: scan can't run, tmp_channel is what we use, so this
+ * must be the currently active channel.
*/
- roc->on_channel = roc->chan == local->_oper_chandef.chan &&
- local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 &&
- local->_oper_chandef.width != NL80211_CHAN_WIDTH_10;
+ roc->on_channel = roc->chan == local->hw.conf.chandef.chan &&
+ local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_5 &&
+ local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_10;
/* start this ROC */
ieee80211_recalc_idle(local);
@@ -363,7 +366,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
ieee80211_offchannel_stop_vifs(local);
local->tmp_channel = roc->chan;
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
}
wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work,
@@ -426,7 +429,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
return;
if (!roc->started) {
- WARN_ON(local->use_chanctx);
+ WARN_ON(!local->emulate_chanctx);
_ieee80211_start_next_roc(local);
} else {
on_channel = roc->on_channel;
@@ -439,7 +442,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
ieee80211_flush_queues(local, NULL, false);
local->tmp_channel = NULL;
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
ieee80211_offchannel_return(local);
}
@@ -539,7 +542,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/* this may work, but is untested */
return -EOPNOTSUPP;
- if (local->use_chanctx && !local->ops->remain_on_channel)
+ if (!local->emulate_chanctx && !local->ops->remain_on_channel)
return -EOPNOTSUPP;
roc = kzalloc(sizeof(*roc), GFP_KERNEL);
@@ -894,8 +897,18 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
break;
}
- if (ether_addr_equal(conf->addr, mgmt->sa))
+ if (ether_addr_equal(conf->addr, mgmt->sa)) {
+ /* If userspace requested Tx on a specific link
+ * use the same link id if the link bss is matching
+ * the requested chan.
+ */
+ if (sdata->vif.valid_links &&
+ params->link_id >= 0 && params->link_id == i &&
+ params->chan == chanctx_conf->def.chan)
+ link_id = i;
+
break;
+ }
chanctx_conf = NULL;
}
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
new file mode 100644
index 0000000000..055a60e909
--- /dev/null
+++ b/net/mac80211/parse.c
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2024 Intel Corporation
+ *
+ * element parsing for mac80211
+ */
+
+#include <net/mac80211.h>
+#include <linux/netdevice.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/bitmap.h>
+#include <linux/crc32.h>
+#include <net/net_namespace.h>
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <kunit/visibility.h>
+
+#include "ieee80211_i.h"
+#include "driver-ops.h"
+#include "rate.h"
+#include "mesh.h"
+#include "wme.h"
+#include "led.h"
+#include "wep.h"
+
+struct ieee80211_elems_parse {
+ /* must be first for kfree to work */
+ struct ieee802_11_elems elems;
+
+ /* The basic Multi-Link element in the original elements */
+ const struct element *ml_basic_elem;
+
+ /* The reconfiguration Multi-Link element in the original elements */
+ const struct element *ml_reconf_elem;
+
+ /*
+ * scratch buffer that can be used for various element parsing related
+ * tasks, e.g., element de-fragmentation etc.
+ */
+ size_t scratch_len;
+ u8 *scratch_pos;
+ u8 scratch[] __counted_by(scratch_len);
+};
+
+static void
+ieee80211_parse_extension_element(u32 *crc,
+ const struct element *elem,
+ struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elems_parse_params *params)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const void *data = elem->data + 1;
+ bool calc_crc = false;
+ u8 len;
+
+ if (!elem->datalen)
+ return;
+
+ len = elem->datalen - 1;
+
+ switch (elem->data[0]) {
+ case WLAN_EID_EXT_HE_MU_EDCA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ calc_crc = true;
+ if (len >= sizeof(*elems->mu_edca_param_set))
+ elems->mu_edca_param_set = data;
+ break;
+ case WLAN_EID_EXT_HE_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (ieee80211_he_capa_size_ok(data, len)) {
+ elems->he_cap = data;
+ elems->he_cap_len = len;
+ }
+ break;
+ case WLAN_EID_EXT_HE_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ calc_crc = true;
+ if (len >= sizeof(*elems->he_operation) &&
+ len >= ieee80211_he_oper_size(data) - 1)
+ elems->he_operation = data;
+ break;
+ case WLAN_EID_EXT_UORA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= 1)
+ elems->uora_element = data;
+ break;
+ case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
+ if (len == 3)
+ elems->max_channel_switch_time = data;
+ break;
+ case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
+ if (len >= sizeof(*elems->mbssid_config_ie))
+ elems->mbssid_config_ie = data;
+ break;
+ case WLAN_EID_EXT_HE_SPR:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= sizeof(*elems->he_spr) &&
+ len >= ieee80211_he_spr_size(data) - 1)
+ elems->he_spr = data;
+ break;
+ case WLAN_EID_EXT_HE_6GHZ_CAPA:
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= sizeof(*elems->he_6ghz_capa))
+ elems->he_6ghz_capa = data;
+ break;
+ case WLAN_EID_EXT_EHT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_eht_capa_size_ok(elems->he_cap,
+ data, len,
+ params->from_ap)) {
+ elems->eht_cap = data;
+ elems->eht_cap_len = len;
+ }
+ break;
+ case WLAN_EID_EXT_EHT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_eht_oper_size_ok(data, len))
+ elems->eht_operation = data;
+ calc_crc = true;
+ break;
+ case WLAN_EID_EXT_EHT_MULTI_LINK:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ calc_crc = true;
+
+ if (ieee80211_mle_size_ok(data, len)) {
+ const struct ieee80211_multi_link_elem *mle =
+ (void *)data;
+
+ switch (le16_get_bits(mle->control,
+ IEEE80211_ML_CONTROL_TYPE)) {
+ case IEEE80211_ML_CONTROL_TYPE_BASIC:
+ if (elems_parse->ml_basic_elem) {
+ elems->parse_error |=
+ IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC;
+ break;
+ }
+ elems_parse->ml_basic_elem = elem;
+ break;
+ case IEEE80211_ML_CONTROL_TYPE_RECONF:
+ elems_parse->ml_reconf_elem = elem;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case WLAN_EID_EXT_BANDWIDTH_INDICATION:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ if (ieee80211_bandwidth_indication_size_ok(data, len))
+ elems->bandwidth_indication = data;
+ calc_crc = true;
+ break;
+ case WLAN_EID_EXT_TID_TO_LINK_MAPPING:
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+ calc_crc = true;
+ if (ieee80211_tid_to_link_map_size_ok(data, len) &&
+ elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) {
+ elems->ttlm[elems->ttlm_num] = (void *)data;
+ elems->ttlm_num++;
+ }
+ break;
+ }
+
+ if (crc && calc_crc)
+ *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2);
+}
+
+static u32
+_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
+ struct ieee80211_elems_parse *elems_parse,
+ const struct element *check_inherit)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const struct element *elem;
+ bool calc_crc = params->filter != 0;
+ DECLARE_BITMAP(seen_elems, 256);
+ u32 crc = params->crc;
+
+ bitmap_zero(seen_elems, 256);
+
+ for_each_element(elem, params->start, params->len) {
+ const struct element *subelem;
+ u8 elem_parse_failed;
+ u8 id = elem->id;
+ u8 elen = elem->datalen;
+ const u8 *pos = elem->data;
+
+ if (check_inherit &&
+ !cfg80211_is_element_inherited(elem,
+ check_inherit))
+ continue;
+
+ switch (id) {
+ case WLAN_EID_SSID:
+ case WLAN_EID_SUPP_RATES:
+ case WLAN_EID_FH_PARAMS:
+ case WLAN_EID_DS_PARAMS:
+ case WLAN_EID_CF_PARAMS:
+ case WLAN_EID_TIM:
+ case WLAN_EID_IBSS_PARAMS:
+ case WLAN_EID_CHALLENGE:
+ case WLAN_EID_RSN:
+ case WLAN_EID_ERP_INFO:
+ case WLAN_EID_EXT_SUPP_RATES:
+ case WLAN_EID_HT_CAPABILITY:
+ case WLAN_EID_HT_OPERATION:
+ case WLAN_EID_VHT_CAPABILITY:
+ case WLAN_EID_VHT_OPERATION:
+ case WLAN_EID_MESH_ID:
+ case WLAN_EID_MESH_CONFIG:
+ case WLAN_EID_PEER_MGMT:
+ case WLAN_EID_PREQ:
+ case WLAN_EID_PREP:
+ case WLAN_EID_PERR:
+ case WLAN_EID_RANN:
+ case WLAN_EID_CHANNEL_SWITCH:
+ case WLAN_EID_EXT_CHANSWITCH_ANN:
+ case WLAN_EID_COUNTRY:
+ case WLAN_EID_PWR_CONSTRAINT:
+ case WLAN_EID_TIMEOUT_INTERVAL:
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ case WLAN_EID_CHAN_SWITCH_PARAM:
+ case WLAN_EID_EXT_CAPABILITY:
+ case WLAN_EID_CHAN_SWITCH_TIMING:
+ case WLAN_EID_LINK_ID:
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
+ case WLAN_EID_RSNX:
+ case WLAN_EID_S1G_BCN_COMPAT:
+ case WLAN_EID_S1G_CAPABILITIES:
+ case WLAN_EID_S1G_OPERATION:
+ case WLAN_EID_AID_RESPONSE:
+ case WLAN_EID_S1G_SHORT_BCN_INTERVAL:
+ /*
+ * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
+ * that if the content gets bigger it might be needed more than once
+ */
+ if (test_bit(id, seen_elems)) {
+ elems->parse_error |=
+ IEEE80211_PARSE_ERR_DUP_ELEM;
+ continue;
+ }
+ break;
+ }
+
+ if (calc_crc && id < 64 && (params->filter & (1ULL << id)))
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ elem_parse_failed = 0;
+
+ switch (id) {
+ case WLAN_EID_LINK_ID:
+ if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->lnk_id = (void *)(pos - 2);
+ break;
+ case WLAN_EID_CHAN_SWITCH_TIMING:
+ if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ch_sw_timing = (void *)pos;
+ break;
+ case WLAN_EID_EXT_CAPABILITY:
+ elems->ext_capab = pos;
+ elems->ext_capab_len = elen;
+ break;
+ case WLAN_EID_SSID:
+ elems->ssid = pos;
+ elems->ssid_len = elen;
+ break;
+ case WLAN_EID_SUPP_RATES:
+ elems->supp_rates = pos;
+ elems->supp_rates_len = elen;
+ break;
+ case WLAN_EID_DS_PARAMS:
+ if (elen >= 1)
+ elems->ds_params = pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_TIM:
+ if (elen >= sizeof(struct ieee80211_tim_ie)) {
+ elems->tim = (void *)pos;
+ elems->tim_len = elen;
+ } else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VENDOR_SPECIFIC:
+ if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
+ pos[2] == 0xf2) {
+ /* Microsoft OUI (00:50:F2) */
+
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ if (elen >= 5 && pos[3] == 2) {
+ /* OUI Type 2 - WMM IE */
+ if (pos[4] == 0) {
+ elems->wmm_info = pos;
+ elems->wmm_info_len = elen;
+ } else if (pos[4] == 1) {
+ elems->wmm_param = pos;
+ elems->wmm_param_len = elen;
+ }
+ }
+ }
+ break;
+ case WLAN_EID_RSN:
+ elems->rsn = pos;
+ elems->rsn_len = elen;
+ break;
+ case WLAN_EID_ERP_INFO:
+ if (elen >= 1)
+ elems->erp_info = pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_EXT_SUPP_RATES:
+ elems->ext_supp_rates = pos;
+ elems->ext_supp_rates_len = elen;
+ break;
+ case WLAN_EID_HT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen >= sizeof(struct ieee80211_ht_cap))
+ elems->ht_cap_elem = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_HT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen >= sizeof(struct ieee80211_ht_operation))
+ elems->ht_operation = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VHT_CAPABILITY:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen >= sizeof(struct ieee80211_vht_cap))
+ elems->vht_cap_elem = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_VHT_OPERATION:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen >= sizeof(struct ieee80211_vht_operation)) {
+ elems->vht_operation = (void *)pos;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_OPMODE_NOTIF:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (elen > 0) {
+ elems->opmode_notif = pos;
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+ break;
+ }
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_MESH_ID:
+ elems->mesh_id = pos;
+ elems->mesh_id_len = elen;
+ break;
+ case WLAN_EID_MESH_CONFIG:
+ if (elen >= sizeof(struct ieee80211_meshconf_ie))
+ elems->mesh_config = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_PEER_MGMT:
+ elems->peering = pos;
+ elems->peering_len = elen;
+ break;
+ case WLAN_EID_MESH_AWAKE_WINDOW:
+ if (elen >= 2)
+ elems->awake_window = (void *)pos;
+ break;
+ case WLAN_EID_PREQ:
+ elems->preq = pos;
+ elems->preq_len = elen;
+ break;
+ case WLAN_EID_PREP:
+ elems->prep = pos;
+ elems->prep_len = elen;
+ break;
+ case WLAN_EID_PERR:
+ elems->perr = pos;
+ elems->perr_len = elen;
+ break;
+ case WLAN_EID_RANN:
+ if (elen >= sizeof(struct ieee80211_rann_ie))
+ elems->rann = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_CHANNEL_SWITCH:
+ if (elen != sizeof(struct ieee80211_channel_sw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ch_switch_ie = (void *)pos;
+ break;
+ case WLAN_EID_EXT_CHANSWITCH_ANN:
+ if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->ext_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ if (params->mode < IEEE80211_CONN_MODE_HT)
+ break;
+ if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->sec_chan_offs = (void *)pos;
+ break;
+ case WLAN_EID_CHAN_SWITCH_PARAM:
+ if (elen <
+ sizeof(*elems->mesh_chansw_params_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->mesh_chansw_params_ie = (void *)pos;
+ break;
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+
+ if (!params->action) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM;
+ break;
+ }
+
+ if (elen < sizeof(*elems->wide_bw_chansw_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->wide_bw_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
+ if (params->mode < IEEE80211_CONN_MODE_VHT)
+ break;
+ if (params->action) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_UNEXPECTED_ELEM;
+ break;
+ }
+ /*
+ * This is a bit tricky, but as we only care about
+ * a few elements, parse them out manually.
+ */
+ subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
+ pos, elen);
+ if (subelem) {
+ if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie))
+ elems->wide_bw_chansw_ie =
+ (void *)subelem->data;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ }
+
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ break;
+
+ subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION,
+ pos, elen);
+ if (subelem) {
+ const void *edata = subelem->data + 1;
+ u8 edatalen = subelem->datalen - 1;
+
+ if (ieee80211_bandwidth_indication_size_ok(edata,
+ edatalen))
+ elems->bandwidth_indication = edata;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ }
+ break;
+ case WLAN_EID_COUNTRY:
+ elems->country_elem = pos;
+ elems->country_elem_len = elen;
+ break;
+ case WLAN_EID_PWR_CONSTRAINT:
+ if (elen != 1) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->pwr_constr_elem = pos;
+ break;
+ case WLAN_EID_CISCO_VENDOR_SPECIFIC:
+ /* Lots of different options exist, but we only care
+ * about the Dynamic Transmit Power Control element.
+ * First check for the Cisco OUI, then for the DTPC
+ * tag (0x00).
+ */
+ if (elen < 4) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+
+ if (pos[0] != 0x00 || pos[1] != 0x40 ||
+ pos[2] != 0x96 || pos[3] != 0x00)
+ break;
+
+ if (elen != 6) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ elems->cisco_dtpc_elem = pos;
+ break;
+ case WLAN_EID_ADDBA_EXT:
+ if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ }
+ elems->addba_ext_ie = (void *)pos;
+ break;
+ case WLAN_EID_TIMEOUT_INTERVAL:
+ if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
+ elems->timeout_int = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_BSS_MAX_IDLE_PERIOD:
+ if (elen >= sizeof(*elems->max_idle_period_ie))
+ elems->max_idle_period_ie = (void *)pos;
+ break;
+ case WLAN_EID_RSNX:
+ elems->rsnx = pos;
+ elems->rsnx_len = elen;
+ break;
+ case WLAN_EID_TX_POWER_ENVELOPE:
+ if (elen < 1 ||
+ elen > sizeof(struct ieee80211_tx_pwr_env))
+ break;
+
+ if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
+ break;
+
+ elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
+ elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
+ elems->tx_pwr_env_num++;
+ break;
+ case WLAN_EID_EXTENSION:
+ ieee80211_parse_extension_element(calc_crc ?
+ &crc : NULL,
+ elem, elems_parse,
+ params);
+ break;
+ case WLAN_EID_S1G_CAPABILITIES:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen >= sizeof(*elems->s1g_capab))
+ elems->s1g_capab = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_S1G_OPERATION:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(*elems->s1g_oper))
+ elems->s1g_oper = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_S1G_BCN_COMPAT:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(*elems->s1g_bcn_compat))
+ elems->s1g_bcn_compat = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ case WLAN_EID_AID_RESPONSE:
+ if (params->mode != IEEE80211_CONN_MODE_S1G)
+ break;
+ if (elen == sizeof(struct ieee80211_aid_response_ie))
+ elems->aid_resp = (void *)pos;
+ else
+ elem_parse_failed =
+ IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
+ break;
+ default:
+ break;
+ }
+
+ if (elem_parse_failed)
+ elems->parse_error |= elem_parse_failed;
+ else
+ __set_bit(id, seen_elems);
+ }
+
+ if (!for_each_element_completed(elem, params->start, params->len))
+ elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END;
+
+ return crc;
+}
+
+static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+ struct ieee802_11_elems *elems,
+ struct cfg80211_bss *bss,
+ u8 *nontransmitted_profile)
+{
+ const struct element *elem, *sub;
+ size_t profile_len = 0;
+ bool found = false;
+
+ if (!bss || !bss->transmitted_bss)
+ return profile_len;
+
+ for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
+ if (elem->datalen < 2)
+ continue;
+ if (elem->data[0] < 1 || elem->data[0] > 8)
+ continue;
+
+ for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+ u8 new_bssid[ETH_ALEN];
+ const u8 *index;
+
+ if (sub->id != 0 || sub->datalen < 4) {
+ /* not a valid BSS profile */
+ continue;
+ }
+
+ if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
+ sub->data[1] != 2) {
+ /* The first element of the
+ * Nontransmitted BSSID Profile is not
+ * the Nontransmitted BSSID Capability
+ * element.
+ */
+ continue;
+ }
+
+ memset(nontransmitted_profile, 0, len);
+ profile_len = cfg80211_merge_profile(start, len,
+ elem,
+ sub,
+ nontransmitted_profile,
+ len);
+
+ /* found a Nontransmitted BSSID Profile */
+ index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
+ nontransmitted_profile,
+ profile_len);
+ if (!index || index[1] < 1 || index[2] == 0) {
+ /* Invalid MBSSID Index element */
+ continue;
+ }
+
+ cfg80211_gen_new_bssid(bss->transmitted_bss->bssid,
+ elem->data[0],
+ index[2],
+ new_bssid);
+ if (ether_addr_equal(new_bssid, bss->bssid)) {
+ found = true;
+ elems->bssid_index_len = index[1];
+ elems->bssid_index = (void *)&index[2];
+ break;
+ }
+ }
+ }
+
+ return found ? profile_len : 0;
+}
+
+static void
+ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse,
+ u8 link_id)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ const struct ieee80211_multi_link_elem *ml = elems->ml_basic;
+ ssize_t ml_len = elems->ml_basic_len;
+ const struct element *sub;
+
+ for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
+ struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
+ ssize_t sta_prof_len;
+ u16 control;
+
+ if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
+ continue;
+
+ if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
+ sub->datalen))
+ return;
+
+ control = le16_to_cpu(prof->control);
+
+ if (link_id != u16_get_bits(control,
+ IEEE80211_MLE_STA_CONTROL_LINK_ID))
+ continue;
+
+ if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE))
+ return;
+
+ /* the sub element can be fragmented */
+ sta_prof_len =
+ cfg80211_defragment_element(sub,
+ (u8 *)ml, ml_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+
+ if (sta_prof_len < 0)
+ return;
+
+ elems->prof = (void *)elems_parse->scratch_pos;
+ elems->sta_prof_len = sta_prof_len;
+ elems_parse->scratch_pos += sta_prof_len;
+
+ return;
+ }
+}
+
+static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elems_parse_params *params)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ struct ieee80211_mle_per_sta_profile *prof;
+ struct ieee80211_elems_parse_params sub = {
+ .mode = params->mode,
+ .action = params->action,
+ .from_ap = params->from_ap,
+ .link_id = -1,
+ };
+ ssize_t ml_len = elems->ml_basic_len;
+ const struct element *non_inherit = NULL;
+ const u8 *end;
+
+ ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem,
+ elems->ie_start,
+ elems->total_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ WLAN_EID_FRAGMENT);
+
+ if (ml_len < 0)
+ return;
+
+ elems->ml_basic = (const void *)elems_parse->scratch_pos;
+ elems->ml_basic_len = ml_len;
+ elems_parse->scratch_pos += ml_len;
+
+ if (params->link_id == -1)
+ return;
+
+ ieee80211_mle_get_sta_prof(elems_parse, params->link_id);
+ prof = elems->prof;
+
+ if (!prof)
+ return;
+
+ /* check if we have the 4 bytes for the fixed part in assoc response */
+ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) {
+ elems->prof = NULL;
+ elems->sta_prof_len = 0;
+ return;
+ }
+
+ /*
+ * Skip the capability information and the status code that are expected
+ * as part of the station profile in association response frames. Note
+ * the -1 is because the 'sta_info_len' is accounted to as part of the
+ * per-STA profile, but not part of the 'u8 variable[]' portion.
+ */
+ sub.start = prof->variable + prof->sta_info_len - 1 + 4;
+ end = (const u8 *)prof + elems->sta_prof_len;
+ sub.len = end - sub.start;
+
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub.start, sub.len);
+ _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit);
+}
+
+static void
+ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ ssize_t ml_len;
+
+ ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem,
+ elems->ie_start,
+ elems->total_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ WLAN_EID_FRAGMENT);
+ if (ml_len < 0)
+ return;
+ elems->ml_reconf = (void *)elems_parse->scratch_pos;
+ elems->ml_reconf_len = ml_len;
+ elems_parse->scratch_pos += ml_len;
+}
+
+struct ieee802_11_elems *
+ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
+{
+ struct ieee80211_elems_parse *elems_parse;
+ struct ieee802_11_elems *elems;
+ const struct element *non_inherit = NULL;
+ u8 *nontransmitted_profile;
+ int nontransmitted_profile_len = 0;
+ size_t scratch_len = 3 * params->len;
+
+ BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0);
+
+ elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len),
+ GFP_ATOMIC);
+ if (!elems_parse)
+ return NULL;
+
+ elems_parse->scratch_len = scratch_len;
+ elems_parse->scratch_pos = elems_parse->scratch;
+
+ elems = &elems_parse->elems;
+ elems->ie_start = params->start;
+ elems->total_len = params->len;
+
+ nontransmitted_profile = elems_parse->scratch_pos;
+ nontransmitted_profile_len =
+ ieee802_11_find_bssid_profile(params->start, params->len,
+ elems, params->bss,
+ nontransmitted_profile);
+ elems_parse->scratch_pos += nontransmitted_profile_len;
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ nontransmitted_profile,
+ nontransmitted_profile_len);
+
+ elems->crc = _ieee802_11_parse_elems_full(params, elems_parse,
+ non_inherit);
+
+ /* Override with nontransmitted profile, if found */
+ if (nontransmitted_profile_len) {
+ struct ieee80211_elems_parse_params sub = {
+ .mode = params->mode,
+ .start = nontransmitted_profile,
+ .len = nontransmitted_profile_len,
+ .action = params->action,
+ .link_id = params->link_id,
+ };
+
+ _ieee802_11_parse_elems_full(&sub, elems_parse, NULL);
+ }
+
+ ieee80211_mle_parse_link(elems_parse, params);
+
+ ieee80211_mle_defrag_reconf(elems_parse);
+
+ if (elems->tim && !elems->parse_error) {
+ const struct ieee80211_tim_ie *tim_ie = elems->tim;
+
+ elems->dtim_period = tim_ie->dtim_period;
+ elems->dtim_count = tim_ie->dtim_count;
+ }
+
+ /* Override DTIM period and count if needed */
+ if (elems->bssid_index &&
+ elems->bssid_index_len >=
+ offsetofend(struct ieee80211_bssid_index, dtim_period))
+ elems->dtim_period = elems->bssid_index->dtim_period;
+
+ if (elems->bssid_index &&
+ elems->bssid_index_len >=
+ offsetofend(struct ieee80211_bssid_index, dtim_count))
+ elems->dtim_count = elems->bssid_index->dtim_count;
+
+ return elems;
+}
+EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full);
+
+int ieee80211_parse_bitrates(enum nl80211_chan_width width,
+ const struct ieee80211_supported_band *sband,
+ const u8 *srates, int srates_len, u32 *rates)
+{
+ u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
+ struct ieee80211_rate *br;
+ int brate, rate, i, j, count = 0;
+
+ *rates = 0;
+
+ for (i = 0; i < srates_len; i++) {
+ rate = srates[i] & 0x7f;
+
+ for (j = 0; j < sband->n_bitrates; j++) {
+ br = &sband->bitrates[j];
+ if ((rate_flags & br->flags) != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(br->bitrate, 5);
+ if (brate == rate) {
+ *rates |= BIT(j);
+ count++;
+ break;
+ }
+ }
+ }
+ return count;
+}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 0efdaa8f2a..4dc1def695 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -4,7 +4,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2019, 2022-2024 Intel Corporation
*/
#include <linux/kernel.h>
@@ -279,10 +279,10 @@ void ieee80211_check_rate_mask(struct ieee80211_link_data *link)
u32 user_mask, basic_rates = link->conf->basic_rates;
enum nl80211_band band;
- if (WARN_ON(!link->conf->chandef.chan))
+ if (WARN_ON(!link->conf->chanreq.oper.chan))
return;
- band = link->conf->chandef.chan->band;
+ band = link->conf->chanreq.oper.chan->band;
if (band == NL80211_BAND_S1GHZ) {
/* TODO */
return;
@@ -762,7 +762,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
u32 i, flags;
*mask = sdata->rc_rateidx_mask[sband->band];
- flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
for (i = 0; i < sband->n_bitrates; i++) {
if ((flags & sband->bitrates[i].flags) != flags)
*mask &= ~BIT(i);
@@ -818,7 +818,7 @@ rate_control_apply_mask_ratetbl(struct sta_info *sta,
mcs_mask, vht_mask))
return;
- chan_width = sta->sdata->vif.bss_conf.chandef.width;
+ chan_width = sta->sdata->vif.bss_conf.chanreq.oper.width;
for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) {
if (rates->rate[i].idx < 0)
break;
@@ -855,7 +855,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
* included in the configured mask and change the rate indexes
* if needed.
*/
- chan_width = sdata->vif.bss_conf.chandef.width;
+ chan_width = sdata->vif.bss_conf.chanreq.oper.width;
for (i = 0; i < max_rates; i++) {
/* Skip invalid rates */
if (rates[i].idx < 0)
@@ -877,6 +877,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
struct ieee80211_sub_if_data *sdata;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_supported_band *sband;
+ u32 mask = ~0;
rate_control_fill_sta_table(sta, info, dest, max_rates);
@@ -889,9 +890,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
+ if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ mask = sdata->rc_rateidx_mask[info->band];
+
if (dest[0].idx < 0)
__rate_control_send_low(&sdata->local->hw, sband, sta, info,
- sdata->rc_rateidx_mask[info->band]);
+ mask);
if (sta)
rate_fixup_ratelist(vif, sband, info, dest, max_rates);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 541b0f53c6..4914692750 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -1251,8 +1251,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- u16 sc = le16_to_cpu(hdr->seq_ctrl);
- u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
+ u16 mpdu_seq_num = ieee80211_get_sn(hdr);
u16 head_seq_num, buf_size;
int index;
bool ret = true;
@@ -1435,13 +1434,31 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (ieee80211_is_ctl(hdr->frame_control) ||
- ieee80211_is_any_nullfunc(hdr->frame_control) ||
- is_multicast_ether_addr(hdr->addr1))
+ ieee80211_is_any_nullfunc(hdr->frame_control))
return RX_CONTINUE;
if (!rx->sta)
return RX_CONTINUE;
+ if (unlikely(is_multicast_ether_addr(hdr->addr1))) {
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ u16 sn = ieee80211_get_sn(hdr);
+
+ if (!ieee80211_is_data_present(hdr->frame_control))
+ return RX_CONTINUE;
+
+ if (!ieee80211_vif_is_mld(&sdata->vif) ||
+ sdata->vif.type != NL80211_IFTYPE_STATION)
+ return RX_CONTINUE;
+
+ if (sdata->u.mgd.mcast_seq_last != IEEE80211_SN_MODULO &&
+ ieee80211_sn_less_eq(sn, sdata->u.mgd.mcast_seq_last))
+ return RX_DROP_U_DUP;
+
+ sdata->u.mgd.mcast_seq_last = sn;
+ return RX_CONTINUE;
+ }
+
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
@@ -3351,7 +3368,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION))
return;
- if (rx->sdata->vif.bss_conf.csa_active)
+ if (rx->link->conf->csa_active)
return;
baselen = mgmt->u.beacon.variable - rx->skb->data;
@@ -3363,7 +3380,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
rx->skb->len - baselen);
if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) &&
ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) {
- struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf;
+ struct ieee80211_bss_conf *bss_conf = rx->link->conf;
const struct ieee80211_he_operation *he_oper;
u8 color;
@@ -3377,7 +3394,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
if (color == bss_conf->he_bss_color.color)
ieee80211_obss_color_collision_notify(&rx->sdata->vif,
BIT_ULL(color),
- GFP_ATOMIC);
+ bss_conf->link_id);
}
}
@@ -3770,6 +3787,32 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
}
break;
+ case WLAN_CATEGORY_PROTECTED_EHT:
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_req.action_code))
+ break;
+
+ switch (mgmt->u.action.u.ttlm_req.action_code) {
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_req))
+ goto invalid;
+ goto queue;
+ case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_res))
+ goto invalid;
+ goto queue;
+ default:
+ break;
+ }
+ break;
}
return RX_CONTINUE;
@@ -3927,8 +3970,8 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
__ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, -1,
status->band);
}
- dev_kfree_skb(rx->skb);
- return RX_QUEUED;
+
+ return RX_DROP_U_UNKNOWN_ACTION_REJECTED;
}
static ieee80211_rx_result debug_noinline
@@ -5199,7 +5242,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
*/
if (!status->link_valid && pubsta->mlo) {
- struct ieee80211_hdr *hdr = (void *)skb->data;
struct link_sta_info *link_sta;
link_sta = link_sta_info_get_bss(rx.sdata,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f9d5842601..b5f2df61c7 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -257,7 +257,6 @@ static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_sub_if_data *sdata1, *sdata2;
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
@@ -281,12 +280,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (skb->len < min_hdr_len)
return;
- sdata1 = rcu_dereference(local->scan_sdata);
- sdata2 = rcu_dereference(local->sched_scan_sdata);
-
- if (likely(!sdata1 && !sdata2))
- return;
-
if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
/*
* we were passive scanning because of radar/no-IR, but
@@ -304,10 +297,17 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
return;
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+ struct ieee80211_sub_if_data *sdata1, *sdata2;
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
u32 scan_req_flags = 0, sched_scan_req_flags = 0;
+ sdata1 = rcu_dereference(local->scan_sdata);
+ sdata2 = rcu_dereference(local->sched_scan_sdata);
+
+ if (likely(!sdata1 && !sdata2))
+ return;
+
scan_req = rcu_dereference(local->scan_req);
sched_scan_req = rcu_dereference(local->sched_scan_req);
@@ -327,8 +327,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
sched_scan_req_flags,
mgmt->da))
return;
+ } else {
+ /* Beacons are expected only with broadcast address */
+ if (!is_broadcast_ether_addr(mgmt->da))
+ return;
}
+ /* Do not update the BSS table in case of only monitor interfaces */
+ if (local->open_count == local->monitors)
+ return;
+
bss = ieee80211_bss_info_update(local, rx_status,
mgmt, skb->len,
channel);
@@ -350,7 +358,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
struct cfg80211_scan_request *req;
struct cfg80211_chan_def chandef;
u8 bands_used = 0;
- int i, ielen, n_chans;
+ int i, ielen;
+ u32 *n_chans;
u32 flags = 0;
req = rcu_dereference_protected(local->scan_req,
@@ -360,34 +369,34 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
return false;
if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
for (i = 0; i < req->n_channels; i++) {
local->hw_scan_req->req.channels[i] = req->channels[i];
bands_used |= BIT(req->channels[i]->band);
}
-
- n_chans = req->n_channels;
} else {
do {
if (local->hw_scan_band == NUM_NL80211_BANDS)
return false;
- n_chans = 0;
+ n_chans = &local->hw_scan_req->req.n_channels;
+ *n_chans = 0;
for (i = 0; i < req->n_channels; i++) {
if (req->channels[i]->band !=
local->hw_scan_band)
continue;
- local->hw_scan_req->req.channels[n_chans] =
+ local->hw_scan_req->req.channels[(*n_chans)++] =
req->channels[i];
- n_chans++;
+
bands_used |= BIT(req->channels[i]->band);
}
local->hw_scan_band++;
- } while (!n_chans);
+ } while (!*n_chans);
}
- local->hw_scan_req->req.n_channels = n_chans;
ieee80211_prepare_scan_chandef(&chandef);
if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
@@ -400,6 +409,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
req->ie, req->ie_len,
bands_used, req->rates, &chandef,
flags);
+ if (ielen < 0)
+ return false;
local->hw_scan_req->req.ie_len = ielen;
local->hw_scan_req->req.no_cck = req->no_cck;
ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr);
@@ -476,7 +487,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
}
/* Set power back to normal operating levels. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
if (!hw_scan && was_scanning) {
ieee80211_configure_filter(local);
@@ -523,7 +534,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
/* Software scan is not supported in multi-channel cases */
- if (local->use_chanctx)
+ if (!local->emulate_chanctx)
return -EOPNOTSUPP;
/*
@@ -553,7 +564,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
ieee80211_configure_filter(local);
/* We need to set power level at maximum rate for scanning. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
@@ -638,6 +649,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
@@ -677,7 +689,10 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
* After sending probe requests, wait for probe responses
* on the channel.
*/
- *next_delay = IEEE80211_CHANNEL_TIME;
+ *next_delay = msecs_to_jiffies(scan_req->duration) >
+ IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME ?
+ msecs_to_jiffies(scan_req->duration) - IEEE80211_PROBE_DELAY :
+ IEEE80211_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
}
@@ -694,19 +709,11 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
return -EBUSY;
/* For an MLO connection, if a link ID was specified, validate that it
- * is indeed active. If no link ID was specified, select one of the
- * active links.
+ * is indeed active.
*/
- if (ieee80211_vif_is_mld(&sdata->vif)) {
- if (req->tsf_report_link_id >= 0) {
- if (!(sdata->vif.active_links &
- BIT(req->tsf_report_link_id)))
- return -EINVAL;
- } else {
- req->tsf_report_link_id =
- __ffs(sdata->vif.active_links);
- }
- }
+ if (ieee80211_vif_is_mld(&sdata->vif) && req->tsf_report_link_id >= 0 &&
+ !(sdata->vif.active_links & BIT(req->tsf_report_link_id)))
+ return -EINVAL;
if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
@@ -738,15 +745,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_ies_bufsize *= n_bands;
}
- local->hw_scan_req = kmalloc(
- sizeof(*local->hw_scan_req) +
- req->n_channels * sizeof(req->channels[0]) +
- local->hw_scan_ies_bufsize, GFP_KERNEL);
+ local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req,
+ req.channels,
+ req->n_channels) +
+ local->hw_scan_ies_bufsize,
+ GFP_KERNEL);
if (!local->hw_scan_req)
return -ENOMEM;
local->hw_scan_req->req.ssids = req->ssids;
local->hw_scan_req->req.n_ssids = req->n_ssids;
+ /* None of the channels are actually set
+ * up but let UBSAN know the boundaries.
+ */
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
ies = (u8 *)local->hw_scan_req +
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]);
@@ -787,7 +800,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (hw_scan) {
__set_bit(SCAN_HW_SCANNING, &local->scanning);
} else if ((req->n_channels == 1) &&
- (req->channels[0] == local->_oper_chandef.chan)) {
+ (req->channels[0] == local->hw.conf.chandef.chan)) {
/*
* If we are scanning only on the operating channel
* then we do not need to stop normal activities
@@ -805,7 +818,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
ieee80211_configure_filter(local); /* accept probe-responses */
/* We need to ensure power level is at max for scanning. */
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_conf_chan(local);
if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
IEEE80211_CHAN_RADAR)) ||
@@ -970,13 +983,13 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
/* If scanning on oper channel, use whatever channel-type
* is currently in use.
*/
- if (chan == local->_oper_chandef.chan)
- local->scan_chandef = local->_oper_chandef;
+ if (chan == local->hw.conf.chandef.chan)
+ local->scan_chandef = local->hw.conf.chandef;
else
local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
set_channel:
- if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
+ if (ieee80211_hw_conf_chan(local))
skip = 1;
/* advance state machine to next channel/band */
@@ -1000,7 +1013,10 @@ set_channel:
*/
if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
!scan_req->n_ssids) {
- *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ *next_delay = msecs_to_jiffies(scan_req->duration) >
+ IEEE80211_PASSIVE_CHANNEL_TIME ?
+ msecs_to_jiffies(scan_req->duration) :
+ IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
if (scan_req->n_ssids)
set_bit(SCAN_BEACON_WAIT, &local->scanning);
@@ -1017,7 +1033,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
{
/* switch back to the operating channel */
local->scan_chandef.chan = NULL;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+ ieee80211_hw_conf_chan(local);
/* disable PS */
ieee80211_offchannel_return(local);
@@ -1316,10 +1332,12 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
ieee80211_prepare_scan_chandef(&chandef);
- ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
- &sched_scan_ies, req->ie,
- req->ie_len, bands_used, rate_masks, &chandef,
- flags);
+ ret = ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
+ &sched_scan_ies, req->ie,
+ req->ie_len, bands_used, rate_masks,
+ &chandef, flags);
+ if (ret < 0)
+ goto error;
ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
if (ret == 0) {
@@ -1327,8 +1345,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
rcu_assign_pointer(local->sched_scan_req, req);
}
+error:
kfree(ie);
-
out:
if (ret) {
/* Clean in case of failure after HW restart or upon resume. */
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 55959b0b24..b2de4c6fb8 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -19,21 +19,222 @@
#include "sta_info.h"
#include "wme.h"
+static bool
+wbcs_elem_to_chandef(const struct ieee80211_wide_bw_chansw_ie *wbcs_elem,
+ struct cfg80211_chan_def *chandef)
+{
+ u8 ccfs0 = wbcs_elem->new_center_freq_seg0;
+ u8 ccfs1 = wbcs_elem->new_center_freq_seg1;
+ u32 cf0 = ieee80211_channel_to_frequency(ccfs0, chandef->chan->band);
+ u32 cf1 = ieee80211_channel_to_frequency(ccfs1, chandef->chan->band);
+
+ switch (wbcs_elem->new_channel_width) {
+ case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ /* deprecated encoding */
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 = cf0;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ /* deprecated encoding */
+ chandef->width = NL80211_CHAN_WIDTH_80P80;
+ chandef->center_freq1 = cf0;
+ chandef->center_freq2 = cf1;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80MHZ:
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ chandef->center_freq1 = cf0;
+
+ if (ccfs1) {
+ u8 diff = abs(ccfs0 - ccfs1);
+
+ if (diff == 8) {
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 = cf1;
+ } else if (diff > 8) {
+ chandef->width = NL80211_CHAN_WIDTH_80P80;
+ chandef->center_freq2 = cf1;
+ }
+ }
+ break;
+ case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ default:
+ /* If the WBCS Element is present, new channel bandwidth is
+ * at least 40 MHz.
+ */
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ chandef->center_freq1 = cf0;
+ break;
+ }
+
+ return cfg80211_chandef_valid(chandef);
+}
+
+static void
+validate_chandef_by_ht_vht_oper(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ u32 vht_cap_info,
+ struct cfg80211_chan_def *chandef)
+{
+ u32 control_freq, center_freq1, center_freq2;
+ enum nl80211_chan_width chan_width;
+ struct ieee80211_ht_operation ht_oper;
+ struct ieee80211_vht_operation vht_oper;
+
+ if (conn->mode < IEEE80211_CONN_MODE_HT ||
+ conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
+ chandef->chan = NULL;
+ return;
+ }
+
+ control_freq = chandef->chan->center_freq;
+ center_freq1 = chandef->center_freq1;
+ center_freq2 = chandef->center_freq2;
+ chan_width = chandef->width;
+
+ ht_oper.primary_chan = ieee80211_frequency_to_channel(control_freq);
+ if (control_freq != center_freq1)
+ ht_oper.ht_param = control_freq > center_freq1 ?
+ IEEE80211_HT_PARAM_CHA_SEC_BELOW :
+ IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+ else
+ ht_oper.ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+
+ ieee80211_chandef_ht_oper(&ht_oper, chandef);
+
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
+ return;
+
+ vht_oper.center_freq_seg0_idx =
+ ieee80211_frequency_to_channel(center_freq1);
+ vht_oper.center_freq_seg1_idx = center_freq2 ?
+ ieee80211_frequency_to_channel(center_freq2) : 0;
+
+ switch (chan_width) {
+ case NL80211_CHAN_WIDTH_320:
+ WARN_ON(1);
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ vht_oper.center_freq_seg1_idx = vht_oper.center_freq_seg0_idx;
+ vht_oper.center_freq_seg0_idx +=
+ control_freq < center_freq1 ? -8 : 8;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ break;
+ default:
+ vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT;
+ break;
+ }
+
+ ht_oper.operation_mode =
+ le16_encode_bits(vht_oper.center_freq_seg1_idx,
+ IEEE80211_HT_OP_MODE_CCFS2_MASK);
+
+ if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
+ &vht_oper, &ht_oper, chandef))
+ chandef->chan = NULL;
+}
+
+static void
+validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_conn_settings *conn,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 control_freq, center_freq1, center_freq2;
+ enum nl80211_chan_width chan_width;
+ struct {
+ struct ieee80211_he_operation _oper;
+ struct ieee80211_he_6ghz_oper _6ghz_oper;
+ } __packed he;
+ struct {
+ struct ieee80211_eht_operation _oper;
+ struct ieee80211_eht_operation_info _oper_info;
+ } __packed eht;
+ const struct ieee80211_eht_operation *eht_oper;
+
+ if (conn->mode < IEEE80211_CONN_MODE_HE) {
+ chandef->chan = NULL;
+ return;
+ }
+
+ control_freq = chandef->chan->center_freq;
+ center_freq1 = chandef->center_freq1;
+ center_freq2 = chandef->center_freq2;
+ chan_width = chandef->width;
+
+ he._oper.he_oper_params =
+ le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO);
+ he._6ghz_oper.primary =
+ ieee80211_frequency_to_channel(control_freq);
+ he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1);
+ he._6ghz_oper.ccfs1 = center_freq2 ?
+ ieee80211_frequency_to_channel(center_freq2) : 0;
+
+ switch (chan_width) {
+ case NL80211_CHAN_WIDTH_320:
+ he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0;
+ he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16;
+ he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0;
+ he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8;
+ fallthrough;
+ case NL80211_CHAN_WIDTH_80P80:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ;
+ break;
+ default:
+ he._6ghz_oper.control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ;
+ break;
+ }
+
+ if (conn->mode < IEEE80211_CONN_MODE_EHT) {
+ eht_oper = NULL;
+ } else {
+ eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT;
+ eht._oper_info.control = he._6ghz_oper.control;
+ eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0;
+ eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1;
+ eht_oper = &eht._oper;
+ }
+
+ if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper,
+ eht_oper, chandef))
+ chandef->chan = NULL;
+}
+
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- ieee80211_conn_flags_t conn_flags, u8 *bssid,
+ struct ieee80211_conn_settings *conn,
+ u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
enum nl80211_band new_band = current_band;
int new_freq;
- u8 new_chan_no;
+ u8 new_chan_no = 0, new_op_class = 0;
struct ieee80211_channel *new_chan;
- struct cfg80211_chan_def new_vht_chandef = {};
+ struct cfg80211_chan_def new_chandef = {};
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
const struct ieee80211_bandwidth_indication *bwi;
+ const struct ieee80211_ext_chansw_ie *ext_chansw_elem;
int secondary_channel_offset = -1;
memset(csa_ie, 0, sizeof(*csa_ie));
@@ -41,36 +242,41 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
sec_chan_offs = elems->sec_chan_offs;
wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
bwi = elems->bandwidth_indication;
+ ext_chansw_elem = elems->ext_chansw_ie;
- if (conn_flags & (IEEE80211_CONN_DISABLE_HT |
- IEEE80211_CONN_DISABLE_40MHZ)) {
+ if (conn->mode < IEEE80211_CONN_MODE_HT ||
+ conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
sec_chan_offs = NULL;
wide_bw_chansw_ie = NULL;
}
- if (conn_flags & IEEE80211_CONN_DISABLE_VHT)
+ if (conn->mode < IEEE80211_CONN_MODE_VHT)
wide_bw_chansw_ie = NULL;
- if (elems->ext_chansw_ie) {
- if (!ieee80211_operating_class_to_band(
- elems->ext_chansw_ie->new_operating_class,
- &new_band)) {
- sdata_info(sdata,
- "cannot understand ECSA IE operating class, %d, ignoring\n",
- elems->ext_chansw_ie->new_operating_class);
+ if (ext_chansw_elem) {
+ new_op_class = ext_chansw_elem->new_operating_class;
+
+ if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) {
+ new_op_class = 0;
+ sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n",
+ ext_chansw_elem->new_operating_class);
+ } else {
+ new_chan_no = ext_chansw_elem->new_ch_num;
+ csa_ie->count = ext_chansw_elem->count;
+ csa_ie->mode = ext_chansw_elem->mode;
}
- new_chan_no = elems->ext_chansw_ie->new_ch_num;
- csa_ie->count = elems->ext_chansw_ie->count;
- csa_ie->mode = elems->ext_chansw_ie->mode;
- } else if (elems->ch_switch_ie) {
+ }
+
+ if (!new_op_class && elems->ch_switch_ie) {
new_chan_no = elems->ch_switch_ie->new_ch_num;
csa_ie->count = elems->ch_switch_ie->count;
csa_ie->mode = elems->ch_switch_ie->mode;
- } else {
- /* nothing here we understand */
- return 1;
}
+ /* nothing here we understand */
+ if (!new_chan_no)
+ return 1;
+
/* Mesh Channel Switch Parameters Element */
if (elems->mesh_chansw_params_ie) {
csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl;
@@ -95,7 +301,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
if (sec_chan_offs) {
secondary_channel_offset = sec_chan_offs->sec_chan_offs;
- } else if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ } else if (conn->mode >= IEEE80211_CONN_MODE_HT) {
/* If the secondary channel offset IE is not present,
* we can't know what's the post-CSA offset, so the
* best we can do is use 20MHz.
@@ -107,26 +313,26 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
default:
/* secondary_channel_offset was present but is invalid */
case IEEE80211_HT_PARAM_CHA_SEC_NONE:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT20);
break;
case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT40PLUS);
break;
case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_HT40MINUS);
break;
case -1:
- cfg80211_chandef_create(&csa_ie->chandef, new_chan,
+ cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan,
NL80211_CHAN_NO_HT);
/* keep width for 5/10 MHz channels */
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (sdata->vif.bss_conf.chanreq.oper.width) {
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
- csa_ie->chandef.width =
- sdata->vif.bss_conf.chandef.width;
+ csa_ie->chanreq.oper.width =
+ sdata->vif.bss_conf.chanreq.oper.width;
break;
default:
break;
@@ -134,59 +340,52 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
break;
}
+ /* parse one of the Elements to build a new chandef */
+ memset(&new_chandef, 0, sizeof(new_chandef));
+ new_chandef.chan = new_chan;
if (bwi) {
/* start with the CSA one */
- new_vht_chandef = csa_ie->chandef;
+ new_chandef = csa_ie->chanreq.oper;
/* and update the width accordingly */
- /* FIXME: support 160/320 */
- ieee80211_chandef_eht_oper(&bwi->info, true, true,
- &new_vht_chandef);
- } else if (wide_bw_chansw_ie) {
- u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1;
- struct ieee80211_vht_operation vht_oper = {
- .chan_width =
- wide_bw_chansw_ie->new_channel_width,
- .center_freq_seg0_idx =
- wide_bw_chansw_ie->new_center_freq_seg0,
- .center_freq_seg1_idx = new_seg1,
- /* .basic_mcs_set doesn't matter */
- };
- struct ieee80211_ht_operation ht_oper = {
- .operation_mode =
- cpu_to_le16(new_seg1 <<
- IEEE80211_HT_OP_MODE_CCFS2_SHIFT),
- };
-
- /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
- * to the previously parsed chandef
- */
- new_vht_chandef = csa_ie->chandef;
-
- /* ignore if parsing fails */
- if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
- vht_cap_info,
- &vht_oper, &ht_oper,
- &new_vht_chandef))
- new_vht_chandef.chan = NULL;
-
- if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
- ieee80211_chandef_downgrade(&new_vht_chandef);
- if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
- ieee80211_chandef_downgrade(&new_vht_chandef);
+ ieee80211_chandef_eht_oper(&bwi->info, &new_chandef);
+
+ if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT)
+ new_chandef.punctured =
+ get_unaligned_le16(bwi->info.optional);
+ } else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie,
+ &new_chandef)) {
+ if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan,
+ &new_chandef))
+ new_chandef = csa_ie->chanreq.oper;
}
- /* if VHT data is there validate & use it */
- if (new_vht_chandef.chan) {
- if (!cfg80211_chandef_compatible(&new_vht_chandef,
- &csa_ie->chandef)) {
+ /* check if the new chandef fits the capabilities */
+ if (new_band == NL80211_BAND_6GHZ)
+ validate_chandef_by_6ghz_he_eht_oper(sdata, conn, &new_chandef);
+ else
+ validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info,
+ &new_chandef);
+
+ /* if data is there validate the bandwidth & use it */
+ if (new_chandef.chan) {
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 &&
+ new_chandef.width == NL80211_CHAN_WIDTH_320)
+ ieee80211_chandef_downgrade(&new_chandef, NULL);
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 &&
+ (new_chandef.width == NL80211_CHAN_WIDTH_80P80 ||
+ new_chandef.width == NL80211_CHAN_WIDTH_160))
+ ieee80211_chandef_downgrade(&new_chandef, NULL);
+
+ if (!cfg80211_chandef_compatible(&new_chandef,
+ &csa_ie->chanreq.oper)) {
sdata_info(sdata,
"BSS %pM: CSA has inconsistent channel data, disconnecting\n",
bssid);
return -EINVAL;
}
- csa_ie->chandef = new_vht_chandef;
+
+ csa_ie->chanreq.oper = new_chandef;
}
if (elems->max_channel_switch_time)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4391d8dd63..aa22f09e6d 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1566,7 +1566,8 @@ void sta_info_stop(struct ieee80211_local *local)
}
-int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
+int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
+ int link_id)
{
struct ieee80211_local *local = sdata->local;
struct sta_info *sta, *tmp;
@@ -1580,12 +1581,18 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
WARN_ON(vlans && !sdata->bss);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
- if (sdata == sta->sdata ||
- (vlans && sdata->bss == sta->sdata->bss)) {
- if (!WARN_ON(__sta_info_destroy_part1(sta)))
- list_add(&sta->free_list, &free_list);
- ret++;
- }
+ if (sdata != sta->sdata &&
+ (!vlans || sdata->bss != sta->sdata->bss))
+ continue;
+
+ if (link_id >= 0 && sta->sta.valid_links &&
+ !(sta->sta.valid_links & BIT(link_id)))
+ continue;
+
+ if (!WARN_ON(__sta_info_destroy_part1(sta)))
+ list_add(&sta->free_list, &free_list);
+
+ ret++;
}
if (!list_empty(&free_list)) {
@@ -1717,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
skb_queue_head_init(&pending);
/* sync with ieee80211_tx_h_unicast_ps_buf */
- spin_lock(&sta->ps_lock);
+ spin_lock_bh(&sta->ps_lock);
/* Send all buffered frames to the station */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;
@@ -1746,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
*/
clear_sta_flag(sta, WLAN_STA_PSPOLL);
clear_sta_flag(sta, WLAN_STA_UAPSD);
- spin_unlock(&sta->ps_lock);
+ spin_unlock_bh(&sta->ps_lock);
atomic_dec(&ps->num_sta_ps);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ac4c7a6f96..9195d5a2de 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -727,6 +727,12 @@ struct sta_info {
struct ieee80211_sta sta;
};
+static inline int ieee80211_tdls_sta_link_id(struct sta_info *sta)
+{
+ /* TDLS STA can only have a single link */
+ return sta->sta.valid_links ? __ffs(sta->sta.valid_links) : 0;
+}
+
static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
{
#ifdef CONFIG_MAC80211_MESH
@@ -886,23 +892,31 @@ void sta_info_stop(struct ieee80211_local *local);
/**
* __sta_info_flush - flush matching STA entries from the STA table
*
- * Returns the number of removed STA entries.
+ * Return: the number of removed STA entries.
*
* @sdata: sdata to remove all stations from
* @vlans: if the given interface is an AP interface, also flush VLANs
+ * @link_id: if given (>=0), all those STA entries using @link_id only
+ * will be removed. If -1 is passed, all STA entries will be
+ * removed.
*/
-int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans);
+int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
+ int link_id);
/**
* sta_info_flush - flush matching STA entries from the STA table
*
- * Returns the number of removed STA entries.
+ * Return: the number of removed STA entries.
*
* @sdata: sdata to remove all stations from
+ * @link_id: if given (>=0), all those STA entries using @link_id only
+ * will be removed. If -1 is passed, all STA entries will be
+ * removed.
*/
-static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata)
+static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata,
+ int link_id)
{
- return __sta_info_flush(sdata, false);
+ return __sta_info_flush(sdata, false, link_id);
}
void sta_set_rate_info_tx(struct sta_info *sta,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 1708b33cdc..dd8f857a1f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2021-2023 Intel Corporation
+ * Copyright 2021-2024 Intel Corporation
*/
#include <linux/export.h>
@@ -696,6 +696,23 @@ static void ieee80211_handle_smps_status(struct ieee80211_sub_if_data *sdata,
wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.recalc_smps);
}
+static void
+ieee80211_handle_teardown_ttlm_status(struct ieee80211_sub_if_data *sdata,
+ bool acked)
+{
+ if (!sdata || !ieee80211_sdata_running(sdata))
+ return;
+
+ if (!acked)
+ return;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.teardown_ttlm_work);
+}
+
static void ieee80211_report_used_skb(struct ieee80211_local *local,
struct sk_buff *skb, bool dropped,
ktime_t ack_hwtstamp)
@@ -773,6 +790,9 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
ieee80211_handle_smps_status(sdata, acked,
info->status_data);
break;
+ case IEEE80211_STATUS_TYPE_NEG_TTLM:
+ ieee80211_handle_teardown_ttlm_status(sdata, acked);
+ break;
}
rcu_read_unlock();
}
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 49730b4241..f07b409164 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -6,7 +6,7 @@
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
* Copyright 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2019, 2021-2023 Intel Corporation
+ * Copyright (C) 2019, 2021-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -159,7 +159,7 @@ static void ieee80211_tdls_add_oper_classes(struct ieee80211_link_data *link,
u8 *pos;
u8 op_class;
- if (!ieee80211_chandef_to_operating_class(&link->conf->chandef,
+ if (!ieee80211_chandef_to_operating_class(&link->conf->chanreq.oper,
&op_class))
return;
@@ -347,7 +347,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
(uc.width > sta->tdls_chandef.width &&
!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
sdata->wdev.iftype)))
- ieee80211_chandef_downgrade(&uc);
+ ieee80211_chandef_downgrade(&uc, NULL);
if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
tdls_dbg(sdata, "TDLS ch width upgraded %d -> %d\n",
@@ -382,8 +382,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sband))
return;
- ieee80211_add_srates_ie(sdata, skb, false, sband->band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, sband->band);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_SUPP_RATES);
+ ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_EXT_SUPP_RATES);
ieee80211_tdls_add_supp_channels(sdata, skb);
/* add any custom IEs that go before Extended Capabilities */
@@ -438,7 +438,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sta))
return;
- sta->tdls_chandef = link->conf->chandef;
+ sta->tdls_chandef = link->conf->chanreq.oper;
}
ieee80211_tdls_add_oper_classes(link, skb);
@@ -548,30 +548,14 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
}
/* build the HE-cap from sband */
- if (he_cap &&
- (action_code == WLAN_TDLS_SETUP_REQUEST ||
- action_code == WLAN_TDLS_SETUP_RESPONSE ||
- action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) {
- __le16 he_6ghz_capa;
- u8 cap_size;
-
- cap_size =
- 2 + 1 + sizeof(he_cap->he_cap_elem) +
- ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
- ieee80211_he_ppe_size(he_cap->ppe_thres[0],
- he_cap->he_cap_elem.phy_cap_info);
- pos = skb_put(skb, cap_size);
- pos = ieee80211_ie_build_he_cap(0, pos, he_cap, pos + cap_size);
+ if (action_code == WLAN_TDLS_SETUP_REQUEST ||
+ action_code == WLAN_TDLS_SETUP_RESPONSE ||
+ action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) {
+ ieee80211_put_he_cap(skb, sdata, sband, NULL);
/* Build HE 6Ghz capa IE from sband */
- if (sband->band == NL80211_BAND_6GHZ) {
- cap_size = 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa);
- pos = skb_put(skb, cap_size);
- he_6ghz_capa =
- ieee80211_get_he_6ghz_capa_vif(sband, &sdata->vif);
- pos = ieee80211_write_he_6ghz_cap(pos, he_6ghz_capa,
- pos + cap_size);
- }
+ if (sband->band == NL80211_BAND_6GHZ)
+ ieee80211_put_he_6ghz_cap(skb, sdata, link->smps_mode);
}
/* add any custom IEs that go before EHT capabilities */
@@ -591,21 +575,10 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link,
}
/* build the EHT-cap from sband */
- if (he_cap && eht_cap &&
- (action_code == WLAN_TDLS_SETUP_REQUEST ||
- action_code == WLAN_TDLS_SETUP_RESPONSE ||
- action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) {
- u8 cap_size;
-
- cap_size =
- 2 + 1 + sizeof(eht_cap->eht_cap_elem) +
- ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem, false) +
- ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
- pos = skb_put(skb, cap_size);
- ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + cap_size, false);
- }
+ if (action_code == WLAN_TDLS_SETUP_REQUEST ||
+ action_code == WLAN_TDLS_SETUP_RESPONSE ||
+ action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)
+ ieee80211_put_eht_cap(skb, sdata, sband, NULL);
/* add any remaining IEs */
if (extra_ies_len) {
@@ -638,7 +611,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link,
if (WARN_ON_ONCE(!sta || !ap_sta))
return;
- sta->tdls_chandef = link->conf->chandef;
+ sta->tdls_chandef = link->conf->chanreq.oper;
/* add any custom IEs that go before the QoS IE */
if (extra_ies_len) {
@@ -684,7 +657,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link,
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap,
- &link->conf->chandef, prot,
+ &link->conf->chanreq.oper, prot,
true);
}
@@ -1413,8 +1386,8 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
u16 opmode;
- /* Nothing to do if the BSS connection uses HT */
- if (!(sdata->deflink.u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
+ /* Nothing to do if the BSS connection uses (at least) HT */
+ if (sdata->deflink.u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT)
return;
tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) ||
@@ -2055,8 +2028,9 @@ ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
}
}
-void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
+void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct sta_info *sta;
u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
@@ -2066,6 +2040,9 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
continue;
+ if (sta->deflink.link_id != link->link_id)
+ continue;
+
ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
NL80211_TDLS_TEARDOWN, reason,
GFP_ATOMIC);
diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c
index 997d0cd27b..a413ba29f7 100644
--- a/net/mac80211/tests/elems.c
+++ b/net/mac80211/tests/elems.c
@@ -2,7 +2,7 @@
/*
* KUnit tests for element parsing
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#include <kunit/test.h>
#include "../ieee80211_i.h"
@@ -14,6 +14,7 @@ static void mle_defrag(struct kunit *test)
struct ieee80211_elems_parse_params parse_params = {
.link_id = 12,
.from_ap = true,
+ .mode = IEEE80211_CONN_MODE_EHT,
};
struct ieee802_11_elems *parsed;
struct sk_buff *skb;
@@ -68,7 +69,7 @@ static void mle_defrag(struct kunit *test)
if (IS_ERR_OR_NULL(parsed))
goto free_skb;
- KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem);
+ KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic);
KUNIT_EXPECT_EQ(test,
parsed->ml_basic_len,
2 /* control */ +
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 06835ed4c4..b26aacfbc6 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -33,7 +33,7 @@
__string(vif_name, sdata->name)
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->name)
+ __assign_str(vif_name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
@@ -50,7 +50,7 @@
__entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \
__entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \
__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
-#define CHANDEF_PR_FMT " control:%d.%03d MHz width:%d center: %d.%03d/%d MHz"
+#define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \
__entry->center_freq1, __entry->freq1_offset, __entry->center_freq2
@@ -69,22 +69,45 @@
__entry->min_center_freq1 = (c)->center_freq1; \
__entry->min_freq1_offset = (c)->freq1_offset; \
__entry->min_center_freq2 = (c)->center_freq2;
-#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
+#define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \
__entry->min_chan_width, \
__entry->min_center_freq1, __entry->min_freq1_offset, \
__entry->min_center_freq2
+#define AP_CHANDEF_ENTRY \
+ __field(u32, ap_control_freq) \
+ __field(u32, ap_freq_offset) \
+ __field(u32, ap_chan_width) \
+ __field(u32, ap_center_freq1) \
+ __field(u32, ap_freq1_offset) \
+ __field(u32, ap_center_freq2)
+
+#define AP_CHANDEF_ASSIGN(c) \
+ __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\
+ __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\
+ __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \
+ __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \
+ __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \
+ __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0;
+#define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)"
+#define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \
+ __entry->ap_chan_width, \
+ __entry->ap_center_freq1, __entry->ap_freq1_offset, \
+ __entry->ap_center_freq2
+
#define CHANCTX_ENTRY CHANDEF_ENTRY \
MIN_CHANDEF_ENTRY \
+ AP_CHANDEF_ENTRY \
__field(u8, rx_chains_static) \
__field(u8, rx_chains_dynamic)
#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \
MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \
+ AP_CHANDEF_ASSIGN(&ctx->conf.ap) \
__entry->rx_chains_static = ctx->conf.rx_chains_static; \
__entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic
-#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT " chains:%d/%d"
-#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \
+#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT AP_CHANDEF_PR_FMT " chains:%d/%d"
+#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, AP_CHANDEF_PR_ARG, \
__entry->rx_chains_static, __entry->rx_chains_dynamic
#define KEY_ENTRY __field(u32, cipher) \
@@ -503,9 +526,9 @@ TRACE_EVENT(drv_link_info_changed,
__entry->ht_operation_mode = link_conf->ht_operation_mode;
__entry->cqm_rssi_thold = link_conf->cqm_rssi_thold;
__entry->cqm_rssi_hyst = link_conf->cqm_rssi_hyst;
- __entry->channel_width = link_conf->chandef.width;
- __entry->channel_cfreq1 = link_conf->chandef.center_freq1;
- __entry->channel_cfreq1_offset = link_conf->chandef.freq1_offset;
+ __entry->channel_width = link_conf->chanreq.oper.width;
+ __entry->channel_cfreq1 = link_conf->chanreq.oper.center_freq1;
+ __entry->channel_cfreq1_offset = link_conf->chanreq.oper.freq1_offset;
__entry->qos = link_conf->qos;
__entry->hidden_ssid = link_conf->hidden_ssid;
__entry->txpower = link_conf->txpower;
@@ -1186,7 +1209,7 @@ DEFINE_EVENT(sta_event, drv_flush_sta,
TP_ARGS(local, sdata, sta)
);
-TRACE_EVENT(drv_channel_switch,
+DECLARE_EVENT_CLASS(chanswitch_evt,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
@@ -1201,6 +1224,7 @@ TRACE_EVENT(drv_channel_switch,
__field(u32, device_timestamp)
__field(bool, block_tx)
__field(u8, count)
+ __field(u8, link_id)
),
TP_fast_assign(
@@ -1211,14 +1235,24 @@ TRACE_EVENT(drv_channel_switch,
__entry->device_timestamp = ch_switch->device_timestamp;
__entry->block_tx = ch_switch->block_tx;
__entry->count = ch_switch->count;
+ __entry->link_id = ch_switch->link_id;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " new " CHANDEF_PR_FMT " count:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count
+ LOCAL_PR_FMT VIF_PR_FMT CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu device_ts:%u link_id:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+ __entry->block_tx, __entry->timestamp,
+ __entry->device_timestamp, __entry->link_id
)
);
+DEFINE_EVENT(chanswitch_evt, drv_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel_switch *ch_switch),
+ TP_ARGS(local, sdata, ch_switch)
+);
+
TRACE_EVENT(drv_set_antenna,
TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
@@ -2098,39 +2132,11 @@ TRACE_EVENT(drv_channel_switch_beacon,
)
);
-TRACE_EVENT(drv_pre_channel_switch,
+DEFINE_EVENT(chanswitch_evt, drv_pre_channel_switch,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
-
- TP_ARGS(local, sdata, ch_switch),
-
- TP_STRUCT__entry(
- LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
- ),
-
- TP_fast_assign(
- LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
- ),
-
- TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " prepare channel switch to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
- )
+ TP_ARGS(local, sdata, ch_switch)
);
DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch,
@@ -2145,40 +2151,11 @@ DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
TP_ARGS(local, sdata)
);
-TRACE_EVENT(drv_channel_switch_rx_beacon,
+DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel_switch *ch_switch),
-
- TP_ARGS(local, sdata, ch_switch),
-
- TP_STRUCT__entry(
- LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
- ),
-
- TP_fast_assign(
- LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
- ),
-
- TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT
- " received a channel switch beacon to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
- )
+ TP_ARGS(local, sdata, ch_switch)
);
TRACE_EVENT(drv_get_txpower,
@@ -3035,6 +3012,34 @@ TRACE_EVENT(api_radar_detected,
)
);
+TRACE_EVENT(api_request_smps,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ enum ieee80211_smps_mode smps_mode),
+
+ TP_ARGS(local, sdata, link, smps_mode),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(int, link_id)
+ __field(u32, smps_mode)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->link_id = link->link_id,
+ __entry->smps_mode = smps_mode;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " " VIF_PR_FMT " link:%d, smps_mode:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->smps_mode
+ )
+);
+
/*
* Tracing for internal functions
* (which may also be called in response to driver calls)
@@ -3088,6 +3093,58 @@ TRACE_EVENT(stop_queue,
)
);
+TRACE_EVENT(drv_can_neg_ttlm,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_neg_ttlm *neg_ttlm),
+
+ TP_ARGS(local, sdata, neg_ttlm),
+
+ TP_STRUCT__entry(LOCAL_ENTRY
+ VIF_ENTRY
+ __array(u16, downlink, sizeof(u16) * 8)
+ __array(u16, uplink, sizeof(u16) * 8)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ memcpy(__entry->downlink, neg_ttlm->downlink,
+ sizeof(neg_ttlm->downlink));
+ memcpy(__entry->uplink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->uplink));
+ ),
+
+ TP_printk(LOCAL_PR_FMT ", " VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG)
+);
+
+TRACE_EVENT(drv_neg_ttlm_res,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_neg_ttlm_res res,
+ struct ieee80211_neg_ttlm *neg_ttlm),
+
+ TP_ARGS(local, sdata, res, neg_ttlm),
+
+ TP_STRUCT__entry(LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u32, res)
+ __array(u16, downlink, sizeof(u16) * 8)
+ __array(u16, uplink, sizeof(u16) * 8)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->res = res;
+ memcpy(__entry->downlink, neg_ttlm->downlink,
+ sizeof(neg_ttlm->downlink));
+ memcpy(__entry->uplink, neg_ttlm->uplink,
+ sizeof(neg_ttlm->uplink));
+ ),
+
+ TP_printk(LOCAL_PR_FMT VIF_PR_FMT " response: %d\n ",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->res
+ )
+);
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index c9dbe9aab7..aea4ce55c5 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -16,8 +16,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 120
-
DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_PROTO(struct va_format *vaf),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6fbb15b659..edba4a3184 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -133,6 +133,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
mrate = sband->bitrates[0].bitrate;
for (i = 0; i < sband->n_bitrates; i++) {
struct ieee80211_rate *r = &sband->bitrates[i];
+ u32 flag;
if (r->bitrate > txrate->bitrate)
break;
@@ -145,28 +146,24 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
switch (sband->band) {
case NL80211_BAND_2GHZ:
- case NL80211_BAND_LC: {
- u32 flag;
+ case NL80211_BAND_LC:
if (tx->sdata->deflink.operating_11g_mode)
flag = IEEE80211_RATE_MANDATORY_G;
else
flag = IEEE80211_RATE_MANDATORY_B;
- if (r->flags & flag)
- mrate = r->bitrate;
break;
- }
case NL80211_BAND_5GHZ:
case NL80211_BAND_6GHZ:
- if (r->flags & IEEE80211_RATE_MANDATORY_A)
- mrate = r->bitrate;
+ flag = IEEE80211_RATE_MANDATORY_A;
break;
- case NL80211_BAND_S1GHZ:
- case NL80211_BAND_60GHZ:
- /* TODO, for now fall through */
- case NUM_NL80211_BANDS:
+ default:
+ flag = 0;
WARN_ON(1);
break;
}
+
+ if (r->flags & flag)
+ mrate = r->bitrate;
}
if (rate == -1) {
/* No matching basic rate found; use highest suitable mandatory
@@ -701,11 +698,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.bss_conf = &tx->sdata->vif.bss_conf;
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
- if (tx->sdata->rc_has_mcs_mask[info->band])
- txrc.rate_idx_mcs_mask =
- tx->sdata->rc_rateidx_mcs_mask[info->band];
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ txrc.rate_idx_mask = ~0;
+ } else {
+ txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
+
+ if (tx->sdata->rc_has_mcs_mask[info->band])
+ txrc.rate_idx_mcs_mask =
+ tx->sdata->rc_rateidx_mcs_mask[info->band];
+ }
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
@@ -1607,8 +1609,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
local->cparams.target = MS2TIME(20);
local->cparams.ecn = true;
- local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
- GFP_KERNEL);
+ local->cvars = kvcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
+ GFP_KERNEL);
if (!local->cvars) {
spin_lock_bh(&fq->lock);
fq_reset(fq, fq_skb_free_func);
@@ -1628,7 +1630,7 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
{
struct fq *fq = &local->fq;
- kfree(local->cvars);
+ kvfree(local->cvars);
local->cvars = NULL;
spin_lock_bh(&fq->lock);
@@ -1766,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
break;
}
sdata = rcu_dereference(local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
vif = &sdata->vif;
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
@@ -2393,12 +2395,18 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
if (chanctx_conf)
chandef = &chanctx_conf->def;
- else if (!local->use_chanctx)
- chandef = &local->_oper_chandef;
else
goto fail_rcu;
/*
+ * If driver/HW supports IEEE80211_CHAN_CAN_MONITOR we still
+ * shouldn't transmit on disabled channels.
+ */
+ if (!cfg80211_chandef_usable(local->hw.wiphy, chandef,
+ IEEE80211_CHAN_DISABLED))
+ goto fail_rcu;
+
+ /*
* Frame injection is not allowed if beaconing is not allowed
* or if we need radar detection. Beaconing is usually not allowed when
* the mode or operation (Adhoc, AP, Mesh) does not support DFS.
@@ -2766,8 +2774,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
if (tdls_peer) {
/* For TDLS only one link can be valid with peer STA */
- int tdls_link_id = sta->sta.valid_links ?
- __ffs(sta->sta.valid_links) : 0;
+ int tdls_link_id = ieee80211_tdls_sta_link_id(sta);
struct ieee80211_link_data *link;
/* DA SA BSSID */
@@ -3093,8 +3100,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
case NL80211_IFTYPE_STATION:
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
/* For TDLS only one link can be valid with peer STA */
- int tdls_link_id = sta->sta.valid_links ?
- __ffs(sta->sta.valid_links) : 0;
+ int tdls_link_id = ieee80211_tdls_sta_link_id(sta);
struct ieee80211_link_data *link;
/* DA SA BSSID */
@@ -3951,7 +3957,8 @@ begin:
break;
}
tx.sdata = rcu_dereference(local->monitor_sdata);
- if (tx.sdata) {
+ if (tx.sdata &&
+ ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
vif = &tx.sdata->vif;
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
@@ -3959,7 +3966,8 @@ begin:
ieee80211_free_txskb(&local->hw, skb);
goto begin;
} else {
- vif = NULL;
+ info->control.vif = NULL;
+ return skb;
}
break;
case NL80211_IFTYPE_AP_VLAN:
@@ -5032,16 +5040,24 @@ static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
return beacon->cntdwn_current_counter;
}
-u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif)
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
struct beacon_data *beacon = NULL;
u8 count = 0;
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return 0;
+
rcu_read_lock();
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ goto unlock;
+
if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
+ beacon = rcu_dereference(link->u.ap.beacon);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
beacon = rcu_dereference(sdata->u.ibss.presp);
else if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -5083,9 +5099,11 @@ unlock:
}
EXPORT_SYMBOL(ieee80211_beacon_set_cntdwn);
-bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif,
+ unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
struct beacon_data *beacon = NULL;
u8 *beacon_data;
size_t beacon_data_len;
@@ -5094,9 +5112,17 @@ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
if (!ieee80211_sdata_running(sdata))
return false;
+ if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
+ return 0;
+
rcu_read_lock();
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ goto out;
+
if (vif->type == NL80211_IFTYPE_AP) {
- beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
+ beacon = rcu_dereference(link->u.ap.beacon);
if (WARN_ON(!beacon || !beacon->tail))
goto out;
beacon_data = beacon->tail;
@@ -5282,7 +5308,7 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- ieee80211_beacon_update_cntdwn(vif);
+ ieee80211_beacon_update_cntdwn(vif, link->link_id);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 643c54855b..c11dbe82ae 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* utilities for mac80211
*/
@@ -46,6 +46,11 @@ struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_to_ieee80211_hw);
+const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited = {
+ .mode = IEEE80211_CONN_MODE_EHT,
+ .bw_limit = IEEE80211_CONN_BW_LIMIT_320,
+};
+
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
enum nl80211_iftype type)
{
@@ -771,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local,
sdata = rcu_dereference_check(local->monitor_sdata,
lockdep_is_held(&local->iflist_mtx) ||
lockdep_is_held(&local->hw.wiphy->mtx));
- if (sdata &&
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
sdata->flags & IEEE80211_SDATA_IN_DRIVER))
iterator(data, sdata->vif.addr, &sdata->vif);
@@ -912,776 +917,6 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_queue_delayed_work);
-static void
-ieee80211_parse_extension_element(u32 *crc,
- const struct element *elem,
- struct ieee802_11_elems *elems,
- struct ieee80211_elems_parse_params *params)
-{
- const void *data = elem->data + 1;
- bool calc_crc = false;
- u8 len;
-
- if (!elem->datalen)
- return;
-
- len = elem->datalen - 1;
-
- switch (elem->data[0]) {
- case WLAN_EID_EXT_HE_MU_EDCA:
- calc_crc = true;
- if (len >= sizeof(*elems->mu_edca_param_set))
- elems->mu_edca_param_set = data;
- break;
- case WLAN_EID_EXT_HE_CAPABILITY:
- if (ieee80211_he_capa_size_ok(data, len)) {
- elems->he_cap = data;
- elems->he_cap_len = len;
- }
- break;
- case WLAN_EID_EXT_HE_OPERATION:
- calc_crc = true;
- if (len >= sizeof(*elems->he_operation) &&
- len >= ieee80211_he_oper_size(data) - 1)
- elems->he_operation = data;
- break;
- case WLAN_EID_EXT_UORA:
- if (len >= 1)
- elems->uora_element = data;
- break;
- case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
- if (len == 3)
- elems->max_channel_switch_time = data;
- break;
- case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
- if (len >= sizeof(*elems->mbssid_config_ie))
- elems->mbssid_config_ie = data;
- break;
- case WLAN_EID_EXT_HE_SPR:
- if (len >= sizeof(*elems->he_spr) &&
- len >= ieee80211_he_spr_size(data))
- elems->he_spr = data;
- break;
- case WLAN_EID_EXT_HE_6GHZ_CAPA:
- if (len >= sizeof(*elems->he_6ghz_capa))
- elems->he_6ghz_capa = data;
- break;
- case WLAN_EID_EXT_EHT_CAPABILITY:
- if (ieee80211_eht_capa_size_ok(elems->he_cap,
- data, len,
- params->from_ap)) {
- elems->eht_cap = data;
- elems->eht_cap_len = len;
- }
- break;
- case WLAN_EID_EXT_EHT_OPERATION:
- if (ieee80211_eht_oper_size_ok(data, len))
- elems->eht_operation = data;
- calc_crc = true;
- break;
- case WLAN_EID_EXT_EHT_MULTI_LINK:
- calc_crc = true;
-
- if (ieee80211_mle_size_ok(data, len)) {
- const struct ieee80211_multi_link_elem *mle =
- (void *)data;
-
- switch (le16_get_bits(mle->control,
- IEEE80211_ML_CONTROL_TYPE)) {
- case IEEE80211_ML_CONTROL_TYPE_BASIC:
- elems->ml_basic_elem = (void *)elem;
- elems->ml_basic = data;
- elems->ml_basic_len = len;
- break;
- case IEEE80211_ML_CONTROL_TYPE_RECONF:
- elems->ml_reconf_elem = (void *)elem;
- elems->ml_reconf = data;
- elems->ml_reconf_len = len;
- break;
- default:
- break;
- }
- }
- break;
- case WLAN_EID_EXT_BANDWIDTH_INDICATION:
- if (ieee80211_bandwidth_indication_size_ok(data, len))
- elems->bandwidth_indication = data;
- calc_crc = true;
- break;
- case WLAN_EID_EXT_TID_TO_LINK_MAPPING:
- calc_crc = true;
- if (ieee80211_tid_to_link_map_size_ok(data, len) &&
- elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) {
- elems->ttlm[elems->ttlm_num] = (void *)data;
- elems->ttlm_num++;
- }
- break;
- }
-
- if (crc && calc_crc)
- *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2);
-}
-
-static u32
-_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
- struct ieee802_11_elems *elems,
- const struct element *check_inherit)
-{
- const struct element *elem;
- bool calc_crc = params->filter != 0;
- DECLARE_BITMAP(seen_elems, 256);
- u32 crc = params->crc;
-
- bitmap_zero(seen_elems, 256);
-
- for_each_element(elem, params->start, params->len) {
- const struct element *subelem;
- bool elem_parse_failed;
- u8 id = elem->id;
- u8 elen = elem->datalen;
- const u8 *pos = elem->data;
-
- if (check_inherit &&
- !cfg80211_is_element_inherited(elem,
- check_inherit))
- continue;
-
- switch (id) {
- case WLAN_EID_SSID:
- case WLAN_EID_SUPP_RATES:
- case WLAN_EID_FH_PARAMS:
- case WLAN_EID_DS_PARAMS:
- case WLAN_EID_CF_PARAMS:
- case WLAN_EID_TIM:
- case WLAN_EID_IBSS_PARAMS:
- case WLAN_EID_CHALLENGE:
- case WLAN_EID_RSN:
- case WLAN_EID_ERP_INFO:
- case WLAN_EID_EXT_SUPP_RATES:
- case WLAN_EID_HT_CAPABILITY:
- case WLAN_EID_HT_OPERATION:
- case WLAN_EID_VHT_CAPABILITY:
- case WLAN_EID_VHT_OPERATION:
- case WLAN_EID_MESH_ID:
- case WLAN_EID_MESH_CONFIG:
- case WLAN_EID_PEER_MGMT:
- case WLAN_EID_PREQ:
- case WLAN_EID_PREP:
- case WLAN_EID_PERR:
- case WLAN_EID_RANN:
- case WLAN_EID_CHANNEL_SWITCH:
- case WLAN_EID_EXT_CHANSWITCH_ANN:
- case WLAN_EID_COUNTRY:
- case WLAN_EID_PWR_CONSTRAINT:
- case WLAN_EID_TIMEOUT_INTERVAL:
- case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
- case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
- case WLAN_EID_CHAN_SWITCH_PARAM:
- case WLAN_EID_EXT_CAPABILITY:
- case WLAN_EID_CHAN_SWITCH_TIMING:
- case WLAN_EID_LINK_ID:
- case WLAN_EID_BSS_MAX_IDLE_PERIOD:
- case WLAN_EID_RSNX:
- case WLAN_EID_S1G_BCN_COMPAT:
- case WLAN_EID_S1G_CAPABILITIES:
- case WLAN_EID_S1G_OPERATION:
- case WLAN_EID_AID_RESPONSE:
- case WLAN_EID_S1G_SHORT_BCN_INTERVAL:
- /*
- * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
- * that if the content gets bigger it might be needed more than once
- */
- if (test_bit(id, seen_elems)) {
- elems->parse_error = true;
- continue;
- }
- break;
- }
-
- if (calc_crc && id < 64 && (params->filter & (1ULL << id)))
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- elem_parse_failed = false;
-
- switch (id) {
- case WLAN_EID_LINK_ID:
- if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
- elem_parse_failed = true;
- break;
- }
- elems->lnk_id = (void *)(pos - 2);
- break;
- case WLAN_EID_CHAN_SWITCH_TIMING:
- if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
- elem_parse_failed = true;
- break;
- }
- elems->ch_sw_timing = (void *)pos;
- break;
- case WLAN_EID_EXT_CAPABILITY:
- elems->ext_capab = pos;
- elems->ext_capab_len = elen;
- break;
- case WLAN_EID_SSID:
- elems->ssid = pos;
- elems->ssid_len = elen;
- break;
- case WLAN_EID_SUPP_RATES:
- elems->supp_rates = pos;
- elems->supp_rates_len = elen;
- break;
- case WLAN_EID_DS_PARAMS:
- if (elen >= 1)
- elems->ds_params = pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_TIM:
- if (elen >= sizeof(struct ieee80211_tim_ie)) {
- elems->tim = (void *)pos;
- elems->tim_len = elen;
- } else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VENDOR_SPECIFIC:
- if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
- pos[2] == 0xf2) {
- /* Microsoft OUI (00:50:F2) */
-
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- if (elen >= 5 && pos[3] == 2) {
- /* OUI Type 2 - WMM IE */
- if (pos[4] == 0) {
- elems->wmm_info = pos;
- elems->wmm_info_len = elen;
- } else if (pos[4] == 1) {
- elems->wmm_param = pos;
- elems->wmm_param_len = elen;
- }
- }
- }
- break;
- case WLAN_EID_RSN:
- elems->rsn = pos;
- elems->rsn_len = elen;
- break;
- case WLAN_EID_ERP_INFO:
- if (elen >= 1)
- elems->erp_info = pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_EXT_SUPP_RATES:
- elems->ext_supp_rates = pos;
- elems->ext_supp_rates_len = elen;
- break;
- case WLAN_EID_HT_CAPABILITY:
- if (elen >= sizeof(struct ieee80211_ht_cap))
- elems->ht_cap_elem = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_HT_OPERATION:
- if (elen >= sizeof(struct ieee80211_ht_operation))
- elems->ht_operation = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VHT_CAPABILITY:
- if (elen >= sizeof(struct ieee80211_vht_cap))
- elems->vht_cap_elem = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_VHT_OPERATION:
- if (elen >= sizeof(struct ieee80211_vht_operation)) {
- elems->vht_operation = (void *)pos;
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
- break;
- }
- elem_parse_failed = true;
- break;
- case WLAN_EID_OPMODE_NOTIF:
- if (elen > 0) {
- elems->opmode_notif = pos;
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
- break;
- }
- elem_parse_failed = true;
- break;
- case WLAN_EID_MESH_ID:
- elems->mesh_id = pos;
- elems->mesh_id_len = elen;
- break;
- case WLAN_EID_MESH_CONFIG:
- if (elen >= sizeof(struct ieee80211_meshconf_ie))
- elems->mesh_config = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_PEER_MGMT:
- elems->peering = pos;
- elems->peering_len = elen;
- break;
- case WLAN_EID_MESH_AWAKE_WINDOW:
- if (elen >= 2)
- elems->awake_window = (void *)pos;
- break;
- case WLAN_EID_PREQ:
- elems->preq = pos;
- elems->preq_len = elen;
- break;
- case WLAN_EID_PREP:
- elems->prep = pos;
- elems->prep_len = elen;
- break;
- case WLAN_EID_PERR:
- elems->perr = pos;
- elems->perr_len = elen;
- break;
- case WLAN_EID_RANN:
- if (elen >= sizeof(struct ieee80211_rann_ie))
- elems->rann = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_CHANNEL_SWITCH:
- if (elen != sizeof(struct ieee80211_channel_sw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->ch_switch_ie = (void *)pos;
- break;
- case WLAN_EID_EXT_CHANSWITCH_ANN:
- if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->ext_chansw_ie = (void *)pos;
- break;
- case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
- if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->sec_chan_offs = (void *)pos;
- break;
- case WLAN_EID_CHAN_SWITCH_PARAM:
- if (elen <
- sizeof(*elems->mesh_chansw_params_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->mesh_chansw_params_ie = (void *)pos;
- break;
- case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
- if (!params->action ||
- elen < sizeof(*elems->wide_bw_chansw_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->wide_bw_chansw_ie = (void *)pos;
- break;
- case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
- if (params->action) {
- elem_parse_failed = true;
- break;
- }
- /*
- * This is a bit tricky, but as we only care about
- * a few elements, parse them out manually.
- */
- subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
- pos, elen);
- if (subelem) {
- if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie))
- elems->wide_bw_chansw_ie =
- (void *)subelem->data;
- else
- elem_parse_failed = true;
- }
-
- subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION,
- pos, elen);
- if (subelem) {
- const void *edata = subelem->data + 1;
- u8 edatalen = subelem->datalen - 1;
-
- if (ieee80211_bandwidth_indication_size_ok(edata,
- edatalen))
- elems->bandwidth_indication = edata;
- else
- elem_parse_failed = true;
- }
- break;
- case WLAN_EID_COUNTRY:
- elems->country_elem = pos;
- elems->country_elem_len = elen;
- break;
- case WLAN_EID_PWR_CONSTRAINT:
- if (elen != 1) {
- elem_parse_failed = true;
- break;
- }
- elems->pwr_constr_elem = pos;
- break;
- case WLAN_EID_CISCO_VENDOR_SPECIFIC:
- /* Lots of different options exist, but we only care
- * about the Dynamic Transmit Power Control element.
- * First check for the Cisco OUI, then for the DTPC
- * tag (0x00).
- */
- if (elen < 4) {
- elem_parse_failed = true;
- break;
- }
-
- if (pos[0] != 0x00 || pos[1] != 0x40 ||
- pos[2] != 0x96 || pos[3] != 0x00)
- break;
-
- if (elen != 6) {
- elem_parse_failed = true;
- break;
- }
-
- if (calc_crc)
- crc = crc32_be(crc, pos - 2, elen + 2);
-
- elems->cisco_dtpc_elem = pos;
- break;
- case WLAN_EID_ADDBA_EXT:
- if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
- elem_parse_failed = true;
- break;
- }
- elems->addba_ext_ie = (void *)pos;
- break;
- case WLAN_EID_TIMEOUT_INTERVAL:
- if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
- elems->timeout_int = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_BSS_MAX_IDLE_PERIOD:
- if (elen >= sizeof(*elems->max_idle_period_ie))
- elems->max_idle_period_ie = (void *)pos;
- break;
- case WLAN_EID_RSNX:
- elems->rsnx = pos;
- elems->rsnx_len = elen;
- break;
- case WLAN_EID_TX_POWER_ENVELOPE:
- if (elen < 1 ||
- elen > sizeof(struct ieee80211_tx_pwr_env))
- break;
-
- if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
- break;
-
- elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
- elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
- elems->tx_pwr_env_num++;
- break;
- case WLAN_EID_EXTENSION:
- ieee80211_parse_extension_element(calc_crc ?
- &crc : NULL,
- elem, elems, params);
- break;
- case WLAN_EID_S1G_CAPABILITIES:
- if (elen >= sizeof(*elems->s1g_capab))
- elems->s1g_capab = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_S1G_OPERATION:
- if (elen == sizeof(*elems->s1g_oper))
- elems->s1g_oper = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_S1G_BCN_COMPAT:
- if (elen == sizeof(*elems->s1g_bcn_compat))
- elems->s1g_bcn_compat = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- case WLAN_EID_AID_RESPONSE:
- if (elen == sizeof(struct ieee80211_aid_response_ie))
- elems->aid_resp = (void *)pos;
- else
- elem_parse_failed = true;
- break;
- default:
- break;
- }
-
- if (elem_parse_failed)
- elems->parse_error = true;
- else
- __set_bit(id, seen_elems);
- }
-
- if (!for_each_element_completed(elem, params->start, params->len))
- elems->parse_error = true;
-
- return crc;
-}
-
-static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
- struct ieee802_11_elems *elems,
- struct cfg80211_bss *bss,
- u8 *nontransmitted_profile)
-{
- const struct element *elem, *sub;
- size_t profile_len = 0;
- bool found = false;
-
- if (!bss || !bss->transmitted_bss)
- return profile_len;
-
- for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
- if (elem->datalen < 2)
- continue;
- if (elem->data[0] < 1 || elem->data[0] > 8)
- continue;
-
- for_each_element(sub, elem->data + 1, elem->datalen - 1) {
- u8 new_bssid[ETH_ALEN];
- const u8 *index;
-
- if (sub->id != 0 || sub->datalen < 4) {
- /* not a valid BSS profile */
- continue;
- }
-
- if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
- sub->data[1] != 2) {
- /* The first element of the
- * Nontransmitted BSSID Profile is not
- * the Nontransmitted BSSID Capability
- * element.
- */
- continue;
- }
-
- memset(nontransmitted_profile, 0, len);
- profile_len = cfg80211_merge_profile(start, len,
- elem,
- sub,
- nontransmitted_profile,
- len);
-
- /* found a Nontransmitted BSSID Profile */
- index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
- nontransmitted_profile,
- profile_len);
- if (!index || index[1] < 1 || index[2] == 0) {
- /* Invalid MBSSID Index element */
- continue;
- }
-
- cfg80211_gen_new_bssid(bss->transmitted_bss->bssid,
- elem->data[0],
- index[2],
- new_bssid);
- if (ether_addr_equal(new_bssid, bss->bssid)) {
- found = true;
- elems->bssid_index_len = index[1];
- elems->bssid_index = (void *)&index[2];
- break;
- }
- }
- }
-
- return found ? profile_len : 0;
-}
-
-static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems,
- u8 link_id)
-{
- const struct ieee80211_multi_link_elem *ml = elems->ml_basic;
- ssize_t ml_len = elems->ml_basic_len;
- const struct element *sub;
-
- if (!ml || !ml_len)
- return;
-
- if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC)
- return;
-
- for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
- struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
- ssize_t sta_prof_len;
- u16 control;
-
- if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
- continue;
-
- if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
- sub->datalen))
- return;
-
- control = le16_to_cpu(prof->control);
-
- if (link_id != u16_get_bits(control,
- IEEE80211_MLE_STA_CONTROL_LINK_ID))
- continue;
-
- if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE))
- return;
-
- /* the sub element can be fragmented */
- sta_prof_len =
- cfg80211_defragment_element(sub,
- (u8 *)ml, ml_len,
- elems->scratch_pos,
- elems->scratch +
- elems->scratch_len -
- elems->scratch_pos,
- IEEE80211_MLE_SUBELEM_FRAGMENT);
-
- if (sta_prof_len < 0)
- return;
-
- elems->prof = (void *)elems->scratch_pos;
- elems->sta_prof_len = sta_prof_len;
- elems->scratch_pos += sta_prof_len;
-
- return;
- }
-}
-
-static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems,
- struct ieee80211_elems_parse_params *params)
-{
- struct ieee80211_mle_per_sta_profile *prof;
- struct ieee80211_elems_parse_params sub = {
- .action = params->action,
- .from_ap = params->from_ap,
- .link_id = -1,
- };
- ssize_t ml_len = elems->ml_basic_len;
- const struct element *non_inherit = NULL;
- const u8 *end;
-
- if (params->link_id == -1)
- return;
-
- ml_len = cfg80211_defragment_element(elems->ml_basic_elem,
- elems->ie_start,
- elems->total_len,
- elems->scratch_pos,
- elems->scratch +
- elems->scratch_len -
- elems->scratch_pos,
- WLAN_EID_FRAGMENT);
-
- if (ml_len < 0)
- return;
-
- elems->ml_basic = (const void *)elems->scratch_pos;
- elems->ml_basic_len = ml_len;
-
- ieee80211_mle_get_sta_prof(elems, params->link_id);
- prof = elems->prof;
-
- if (!prof)
- return;
-
- /* check if we have the 4 bytes for the fixed part in assoc response */
- if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) {
- elems->prof = NULL;
- elems->sta_prof_len = 0;
- return;
- }
-
- /*
- * Skip the capability information and the status code that are expected
- * as part of the station profile in association response frames. Note
- * the -1 is because the 'sta_info_len' is accounted to as part of the
- * per-STA profile, but not part of the 'u8 variable[]' portion.
- */
- sub.start = prof->variable + prof->sta_info_len - 1 + 4;
- end = (const u8 *)prof + elems->sta_prof_len;
- sub.len = end - sub.start;
-
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- sub.start, sub.len);
- _ieee802_11_parse_elems_full(&sub, elems, non_inherit);
-}
-
-struct ieee802_11_elems *
-ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
-{
- struct ieee802_11_elems *elems;
- const struct element *non_inherit = NULL;
- u8 *nontransmitted_profile;
- int nontransmitted_profile_len = 0;
- size_t scratch_len = 3 * params->len;
-
- elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC);
- if (!elems)
- return NULL;
- elems->ie_start = params->start;
- elems->total_len = params->len;
- elems->scratch_len = scratch_len;
- elems->scratch_pos = elems->scratch;
-
- nontransmitted_profile = elems->scratch_pos;
- nontransmitted_profile_len =
- ieee802_11_find_bssid_profile(params->start, params->len,
- elems, params->bss,
- nontransmitted_profile);
- elems->scratch_pos += nontransmitted_profile_len;
- elems->scratch_len -= nontransmitted_profile_len;
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- nontransmitted_profile,
- nontransmitted_profile_len);
-
- elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit);
-
- /* Override with nontransmitted profile, if found */
- if (nontransmitted_profile_len) {
- struct ieee80211_elems_parse_params sub = {
- .start = nontransmitted_profile,
- .len = nontransmitted_profile_len,
- .action = params->action,
- .link_id = params->link_id,
- };
-
- _ieee802_11_parse_elems_full(&sub, elems, NULL);
- }
-
- ieee80211_mle_parse_link(elems, params);
-
- if (elems->tim && !elems->parse_error) {
- const struct ieee80211_tim_ie *tim_ie = elems->tim;
-
- elems->dtim_period = tim_ie->dtim_period;
- elems->dtim_count = tim_ie->dtim_count;
- }
-
- /* Override DTIM period and count if needed */
- if (elems->bssid_index &&
- elems->bssid_index_len >=
- offsetofend(struct ieee80211_bssid_index, dtim_period))
- elems->dtim_period = elems->bssid_index->dtim_period;
-
- if (elems->bssid_index &&
- elems->bssid_index_len >=
- offsetofend(struct ieee80211_bssid_index, dtim_count))
- elems->dtim_count = elems->bssid_index->dtim_count;
-
- return elems;
-}
-EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full);
-
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params
*qparam, int ac)
@@ -1938,37 +1173,34 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
}
}
-u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end)
+static int ieee80211_put_s1g_cap(struct sk_buff *skb,
+ struct ieee80211_sta_s1g_cap *s1g_cap)
{
- if ((end - pos) < 5)
- return pos;
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_s1g_cap))
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- *pos++ = 1 + sizeof(cap);
- *pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
- memcpy(pos, &cap, sizeof(cap));
+ skb_put_u8(skb, WLAN_EID_S1G_CAPABILITIES);
+ skb_put_u8(skb, sizeof(struct ieee80211_s1g_cap));
- return pos + 2;
+ skb_put_data(skb, &s1g_cap->cap, sizeof(s1g_cap->cap));
+ skb_put_data(skb, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs));
+
+ return 0;
}
-static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
- u8 *buffer, size_t buffer_len,
- const u8 *ie, size_t ie_len,
- enum nl80211_band band,
- u32 rate_mask,
- struct cfg80211_chan_def *chandef,
- size_t *offset, u32 flags)
+static int ieee80211_put_preq_ies_band(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const u8 *ie, size_t ie_len,
+ size_t *offset,
+ enum nl80211_band band,
+ u32 rate_mask,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
- u8 *pos = buffer, *end = buffer + buffer_len;
+ int i, err;
size_t noffset;
- int supp_rates_len, i;
- u8 rates[32];
- int num_rates;
- int ext_rates_len;
u32 rate_flags;
bool have_80mhz = false;
@@ -1981,32 +1213,13 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
rate_flags = ieee80211_chandef_rate_flags(chandef);
/* For direct scan add S1G IE and consider its override bits */
- if (band == NL80211_BAND_S1GHZ) {
- if (end - pos < 2 + sizeof(struct ieee80211_s1g_cap))
- goto out_err;
- pos = ieee80211_ie_build_s1g_cap(pos, &sband->s1g_cap);
- goto done;
- }
-
- num_rates = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
- if ((BIT(i) & rate_mask) == 0)
- continue; /* skip rate */
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
-
- rates[num_rates++] =
- (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- }
-
- supp_rates_len = min_t(int, num_rates, 8);
+ if (band == NL80211_BAND_S1GHZ)
+ return ieee80211_put_s1g_cap(skb, &sband->s1g_cap);
- if (end - pos < 2 + supp_rates_len)
- goto out_err;
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = supp_rates_len;
- memcpy(pos, rates, supp_rates_len);
- pos += supp_rates_len;
+ err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags,
+ ~rate_mask, WLAN_EID_SUPP_RATES);
+ if (err)
+ return err;
/* insert "request information" if in custom IEs */
if (ie && ie_len) {
@@ -2019,34 +1232,28 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
before_extrates,
ARRAY_SIZE(before_extrates),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
- ext_rates_len = num_rates - supp_rates_len;
- if (ext_rates_len > 0) {
- if (end - pos < 2 + ext_rates_len)
- goto out_err;
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = ext_rates_len;
- memcpy(pos, rates + supp_rates_len, ext_rates_len);
- pos += ext_rates_len;
- }
+ err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags,
+ ~rate_mask, WLAN_EID_EXT_SUPP_RATES);
+ if (err)
+ return err;
if (chandef->chan && sband->band == NL80211_BAND_2GHZ) {
- if (end - pos < 3)
- goto out_err;
- *pos++ = WLAN_EID_DS_PARAMS;
- *pos++ = 1;
- *pos++ = ieee80211_frequency_to_channel(
- chandef->chan->center_freq);
+ if (skb_tailroom(skb) < 3)
+ return -ENOBUFS;
+ skb_put_u8(skb, WLAN_EID_DS_PARAMS);
+ skb_put_u8(skb, 1);
+ skb_put_u8(skb,
+ ieee80211_frequency_to_channel(chandef->chan->center_freq));
}
if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT)
- goto done;
+ return 0;
/* insert custom IEs that go before HT */
if (ie && ie_len) {
@@ -2061,18 +1268,21 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_ht, ARRAY_SIZE(before_ht),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
if (sband->ht_cap.ht_supported) {
- if (end - pos < 2 + sizeof(struct ieee80211_ht_cap))
- goto out_err;
- pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
- sband->ht_cap.cap);
+ u8 *pos;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
+ return -ENOBUFS;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_cap));
+ ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
+ sband->ht_cap.cap);
}
/* insert custom IEs that go before VHT */
@@ -2093,10 +1303,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
@@ -2111,10 +1320,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
}
if (sband->vht_cap.vht_supported && have_80mhz) {
- if (end - pos < 2 + sizeof(struct ieee80211_vht_cap))
- goto out_err;
- pos = ieee80211_ie_build_vht_cap(pos, &sband->vht_cap,
- sband->vht_cap.cap);
+ u8 *pos;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
+ return -ENOBUFS;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap));
+ ieee80211_ie_build_vht_cap(pos, &sband->vht_cap,
+ sband->vht_cap.cap);
}
/* insert custom IEs that go before HE */
@@ -2131,107 +1344,128 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
noffset = ieee80211_ie_split(ie, ie_len,
before_he, ARRAY_SIZE(before_he),
*offset);
- if (end - pos < noffset - *offset)
- goto out_err;
- memcpy(pos, ie + *offset, noffset - *offset);
- pos += noffset - *offset;
+ if (skb_tailroom(skb) < noffset - *offset)
+ return -ENOBUFS;
+ skb_put_data(skb, ie + *offset, noffset - *offset);
*offset = noffset;
}
- he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
- if (he_cap &&
- cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE)) {
- pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end);
- if (!pos)
- goto out_err;
+ err = ieee80211_put_he_cap(skb, sdata, sband, NULL);
+ if (err)
+ return err;
}
- eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
-
- if (eht_cap &&
- cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE |
IEEE80211_CHAN_NO_EHT)) {
- pos = ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, end,
- sdata->vif.type == NL80211_IFTYPE_AP);
- if (!pos)
- goto out_err;
+ err = ieee80211_put_eht_cap(skb, sdata, sband, NULL);
+ if (err)
+ return err;
}
- if (cfg80211_any_usable_channels(local->hw.wiphy,
- BIT(NL80211_BAND_6GHZ),
- IEEE80211_CHAN_NO_HE)) {
- struct ieee80211_supported_band *sband6;
-
- sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
- he_cap = ieee80211_get_he_iftype_cap_vif(sband6, &sdata->vif);
-
- if (he_cap) {
- enum nl80211_iftype iftype =
- ieee80211_vif_type_p2p(&sdata->vif);
- __le16 cap = ieee80211_get_he_6ghz_capa(sband6, iftype);
-
- pos = ieee80211_write_he_6ghz_cap(pos, cap, end);
- }
- }
+ err = ieee80211_put_he_6ghz_cap(skb, sdata, IEEE80211_SMPS_OFF);
+ if (err)
+ return err;
/*
* If adding more here, adjust code in main.c
* that calculates local->scan_ies_len.
*/
- return pos - buffer;
- out_err:
- WARN_ONCE(1, "not enough space for preq IEs\n");
- done:
- return pos - buffer;
+ return 0;
}
-int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
- size_t buffer_len,
- struct ieee80211_scan_ies *ie_desc,
- const u8 *ie, size_t ie_len,
- u8 bands_used, u32 *rate_masks,
- struct cfg80211_chan_def *chandef,
- u32 flags)
+static int ieee80211_put_preq_ies(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
{
- size_t pos = 0, old_pos = 0, custom_ie_offset = 0;
- int i;
+ size_t custom_ie_offset = 0;
+ int i, err;
memset(ie_desc, 0, sizeof(*ie_desc));
for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (bands_used & BIT(i)) {
- pos += ieee80211_build_preq_ies_band(sdata,
- buffer + pos,
- buffer_len - pos,
- ie, ie_len, i,
- rate_masks[i],
- chandef,
- &custom_ie_offset,
- flags);
- ie_desc->ies[i] = buffer + old_pos;
- ie_desc->len[i] = pos - old_pos;
- old_pos = pos;
+ ie_desc->ies[i] = skb_tail_pointer(skb);
+ err = ieee80211_put_preq_ies_band(skb, sdata,
+ ie, ie_len,
+ &custom_ie_offset,
+ i, rate_masks[i],
+ chandef, flags);
+ if (err)
+ return err;
+ ie_desc->len[i] = skb_tail_pointer(skb) -
+ ie_desc->ies[i];
}
}
/* add any remaining custom IEs */
if (ie && ie_len) {
- if (WARN_ONCE(buffer_len - pos < ie_len - custom_ie_offset,
+ if (WARN_ONCE(skb_tailroom(skb) < ie_len - custom_ie_offset,
"not enough space for preq custom IEs\n"))
- return pos;
- memcpy(buffer + pos, ie + custom_ie_offset,
- ie_len - custom_ie_offset);
- ie_desc->common_ies = buffer + pos;
- ie_desc->common_ie_len = ie_len - custom_ie_offset;
- pos += ie_len - custom_ie_offset;
+ return -ENOBUFS;
+ ie_desc->common_ies = skb_tail_pointer(skb);
+ skb_put_data(skb, ie + custom_ie_offset,
+ ie_len - custom_ie_offset);
+ ie_desc->common_ie_len = skb_tail_pointer(skb) -
+ ie_desc->common_ies;
}
- return pos;
+ return 0;
};
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
+ size_t buffer_len,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
+ struct cfg80211_chan_def *chandef,
+ u32 flags)
+{
+ struct sk_buff *skb = alloc_skb(buffer_len, GFP_KERNEL);
+ uintptr_t offs;
+ int ret, i;
+ u8 *start;
+
+ if (!skb)
+ return -ENOMEM;
+
+ start = skb_tail_pointer(skb);
+ memset(start, 0, skb_tailroom(skb));
+ ret = ieee80211_put_preq_ies(skb, sdata, ie_desc, ie, ie_len,
+ bands_used, rate_masks, chandef,
+ flags);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (skb->len > buffer_len) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ memcpy(buffer, start, skb->len);
+
+ /* adjust ie_desc for copy */
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ offs = ie_desc->ies[i] - start;
+ ie_desc->ies[i] = buffer + offs;
+ }
+ offs = ie_desc->common_ies - start;
+ ie_desc->common_ies = buffer + offs;
+
+ ret = skb->len;
+out:
+ consume_skb(skb);
+ return ret;
+}
+
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
const u8 *src, const u8 *dst,
u32 ratemask,
@@ -2244,7 +1478,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
- int ies_len;
u32 rate_masks[NUM_NL80211_BANDS] = {};
struct ieee80211_scan_ies dummy_ie_desc;
@@ -2253,7 +1486,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
* in order to maximize the chance that we get a response. Some
* badly-behaved APs don't respond when this parameter is included.
*/
- chandef.width = sdata->vif.bss_conf.chandef.width;
+ chandef.width = sdata->vif.bss_conf.chanreq.oper.width;
if (flags & IEEE80211_PROBE_FLAG_DIRECTED)
chandef.chan = NULL;
else
@@ -2265,11 +1498,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
return NULL;
rate_masks[chan->band] = ratemask;
- ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb),
- skb_tailroom(skb), &dummy_ie_desc,
- ie, ie_len, BIT(chan->band),
- rate_masks, &chandef, flags);
- skb_put(skb, ies_len);
+ ieee80211_put_preq_ies(skb, sdata, &dummy_ie_desc,
+ ie, ie_len, BIT(chan->band),
+ rate_masks, &chandef, flags);
if (dst) {
mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -2295,7 +1526,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sband))
return 1;
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper);
num_rates = sband->n_bitrates;
supp_rates = 0;
@@ -2335,6 +1567,10 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
void ieee80211_stop_device(struct ieee80211_local *local)
{
+ local_bh_disable();
+ ieee80211_handle_queued_frames(local);
+ local_bh_enable();
+
ieee80211_led_radio(local, false);
ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
@@ -2416,9 +1652,6 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
lockdep_assert_wiphy(local->hw.wiphy);
- if (!local->use_chanctx)
- return;
-
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
if (conf) {
@@ -2612,7 +1845,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add interfaces */
sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
- if (sdata) {
+ if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) {
/* in HW restart it exists already */
WARN_ON(local->resuming);
res = drv_add_interface(local, sdata);
@@ -2648,20 +1881,20 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
/* add channel contexts */
- if (local->use_chanctx) {
- list_for_each_entry(ctx, &local->chanctx_list, list)
- if (ctx->replace_state !=
- IEEE80211_CHANCTX_REPLACES_OTHER)
- WARN_ON(drv_add_chanctx(local, ctx));
-
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (sdata && ieee80211_sdata_running(sdata))
- ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
- }
+ list_for_each_entry(ctx, &local->chanctx_list, list)
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ WARN_ON(drv_add_chanctx(local, ctx));
+
+ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
+ if (sdata && ieee80211_sdata_running(sdata))
+ ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
/* reconfigure hardware */
- ieee80211_hw_config(local, ~0);
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL |
+ IEEE80211_CONF_CHANGE_MONITOR |
+ IEEE80211_CONF_CHANGE_PS |
+ IEEE80211_CONF_CHANGE_RETRY_LIMITS |
+ IEEE80211_CONF_CHANGE_IDLE);
ieee80211_configure_filter(local);
@@ -2703,11 +1936,12 @@ int ieee80211_reconfig(struct ieee80211_local *local)
old);
}
+ sdata->restart_active_links = active_links;
+
for (link_id = 0;
link_id < ARRAY_SIZE(sdata->vif.link_conf);
link_id++) {
- if (ieee80211_vif_is_mld(&sdata->vif) &&
- !(sdata->vif.active_links & BIT(link_id)))
+ if (!ieee80211_vif_link_active(&sdata->vif, link_id))
continue;
link = sdata_dereference(sdata->link[link_id], sdata);
@@ -2756,9 +1990,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
sdata->vif.bss_conf.protected_keep_alive)
changed |= BSS_CHANGED_KEEP_ALIVE;
- if (sdata->vif.bss_conf.eht_puncturing)
- changed |= BSS_CHANGED_EHT_PUNCTURING;
-
ieee80211_bss_info_change_notify(sdata,
changed);
} else if (!WARN_ON(!link)) {
@@ -2834,9 +2065,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(1);
break;
}
-
- if (active_links)
- ieee80211_set_active_links(&sdata->vif, active_links);
}
ieee80211_recalc_ps(local);
@@ -2877,6 +2105,20 @@ int ieee80211_reconfig(struct ieee80211_local *local)
list_for_each_entry(sdata, &local->interfaces, list)
ieee80211_reenable_keys(sdata);
+ /* re-enable multi-link for client interfaces */
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->restart_active_links)
+ ieee80211_set_active_links(&sdata->vif,
+ sdata->restart_active_links);
+ /*
+ * If a link switch was scheduled before the restart, and ran
+ * before reconfig, it will do nothing, so re-schedule.
+ */
+ if (sdata->desired_active_links)
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->activate_links_work);
+ }
+
/* Reconfigure sched scan if it was interrupted by FW restart */
sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
lockdep_is_held(&local->hw.wiphy->mtx));
@@ -3109,21 +2351,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
return pos;
}
-u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap)
-{
- *pos++ = WLAN_EID_S1G_CAPABILITIES;
- *pos++ = sizeof(struct ieee80211_s1g_cap);
- memset(pos, 0, sizeof(struct ieee80211_s1g_cap));
-
- memcpy(pos, &s1g_cap->cap, sizeof(s1g_cap->cap));
- pos += sizeof(s1g_cap->cap);
-
- memcpy(pos, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs));
- pos += sizeof(s1g_cap->nss_mcs);
-
- return pos;
-}
-
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 cap)
{
@@ -3180,7 +2407,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos;
}
-u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+/* this may return more than ieee80211_put_he_6ghz_cap() will need */
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata)
{
const struct ieee80211_sta_he_cap *he_cap;
struct ieee80211_supported_band *sband;
@@ -3190,7 +2418,7 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
if (!sband)
return 0;
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
if (!he_cap)
return 0;
@@ -3201,38 +2429,75 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
he_cap->he_cap_elem.phy_cap_info);
}
-u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
+static void
+ieee80211_get_adjusted_he_cap(const struct ieee80211_conn_settings *conn,
const struct ieee80211_sta_he_cap *he_cap,
- u8 *end)
+ struct ieee80211_he_cap_elem *elem)
{
- struct ieee80211_he_cap_elem elem;
- u8 n;
- u8 ie_len;
- u8 *orig_pos = pos;
+ u8 ru_limit, max_ru;
- /* Make sure we have place for the IE */
- /*
- * TODO: the 1 added is because this temporarily is under the EXTENSION
- * IE. Get rid of it when it moves.
- */
- if (!he_cap)
- return orig_pos;
+ *elem = he_cap->he_cap_elem;
- /* modify on stack first to calculate 'n' and 'ie_len' correctly */
- elem = he_cap->he_cap_elem;
+ switch (conn->bw_limit) {
+ case IEEE80211_CONN_BW_LIMIT_20:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242;
+ break;
+ case IEEE80211_CONN_BW_LIMIT_40:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484;
+ break;
+ case IEEE80211_CONN_BW_LIMIT_80:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996;
+ break;
+ default:
+ ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996;
+ break;
+ }
- if (disable_flags & IEEE80211_CONN_DISABLE_40MHZ)
- elem.phy_cap_info[0] &=
+ max_ru = elem->phy_cap_info[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK;
+ max_ru = min(max_ru, ru_limit);
+ elem->phy_cap_info[8] &= ~IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK;
+ elem->phy_cap_info[8] |= max_ru;
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) {
+ elem->phy_cap_info[0] &=
~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G);
+ elem->phy_cap_info[9] &=
+ ~IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM;
+ }
- if (disable_flags & IEEE80211_CONN_DISABLE_160MHZ)
- elem.phy_cap_info[0] &=
- ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) {
+ elem->phy_cap_info[0] &=
+ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G);
+ elem->phy_cap_info[5] &=
+ ~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK;
+ elem->phy_cap_info[7] &=
+ ~(IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ |
+ IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ);
+ }
+}
- if (disable_flags & IEEE80211_CONN_DISABLE_80P80MHZ)
- elem.phy_cap_info[0] &=
- ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+int ieee80211_put_he_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct ieee80211_he_cap_elem elem;
+ u8 *len;
+ u8 n;
+ u8 ie_len;
+
+ if (!conn)
+ conn = &ieee80211_conn_settings_unlimited;
+
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ if (!he_cap)
+ return 0;
+
+ /* modify on stack first to calculate 'n' and 'ie_len' correctly */
+ ieee80211_get_adjusted_he_cap(conn, he_cap, &elem);
n = ieee80211_he_mcs_nss_size(&elem);
ie_len = 2 + 1 +
@@ -3240,19 +2505,17 @@ u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
he_cap->he_cap_elem.phy_cap_info);
- if ((end - pos) < ie_len)
- return orig_pos;
+ if (skb_tailroom(skb) < ie_len)
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- pos++; /* We'll set the size later below */
- *pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ len = skb_put(skb, 1); /* We'll set the size later below */
+ skb_put_u8(skb, WLAN_EID_EXT_HE_CAPABILITY);
/* Fixed data */
- memcpy(pos, &elem, sizeof(elem));
- pos += sizeof(elem);
+ skb_put_data(skb, &elem, sizeof(elem));
- memcpy(pos, &he_cap->he_mcs_nss_supp, n);
- pos += n;
+ skb_put_data(skb, &he_cap->he_mcs_nss_supp, n);
/* Check if PPE Threshold should be present */
if ((he_cap->he_cap_elem.phy_cap_info[6] &
@@ -3276,41 +2539,39 @@ u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
n = DIV_ROUND_UP(n, 8);
/* Copy PPE Thresholds */
- memcpy(pos, &he_cap->ppe_thres, n);
- pos += n;
+ skb_put_data(skb, &he_cap->ppe_thres, n);
end:
- orig_pos[1] = (pos - orig_pos) - 2;
- return pos;
+ *len = skb_tail_pointer(skb) - len - 1;
+ return 0;
}
-void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
- enum ieee80211_smps_mode smps_mode,
- struct sk_buff *skb)
+int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode)
{
struct ieee80211_supported_band *sband;
const struct ieee80211_sband_iftype_data *iftd;
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
- u8 *pos;
- u16 cap;
+ __le16 cap;
if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy,
BIT(NL80211_BAND_6GHZ),
IEEE80211_CHAN_NO_HE))
- return;
+ return 0;
sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ];
iftd = ieee80211_get_sband_iftype_data(sband, iftype);
if (!iftd)
- return;
+ return 0;
/* Check for device HE 6 GHz capability before adding element */
if (!iftd->he_6ghz_capa.capa)
- return;
+ return 0;
- cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
- cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
+ cap = iftd->he_6ghz_capa.capa;
+ cap &= cpu_to_le16(~IEEE80211_HE_6GHZ_CAP_SM_PS);
switch (smps_mode) {
case IEEE80211_SMPS_AUTOMATIC:
@@ -3318,22 +2579,27 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
WARN_ON(1);
fallthrough;
case IEEE80211_SMPS_OFF:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
case IEEE80211_SMPS_STATIC:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
case IEEE80211_SMPS_DYNAMIC:
- cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
- IEEE80211_HE_6GHZ_CAP_SM_PS);
+ cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
break;
}
- pos = skb_put(skb, 2 + 1 + sizeof(cap));
- ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap),
- pos + 2 + 1 + sizeof(cap));
+ if (skb_tailroom(skb) < 2 + 1 + sizeof(cap))
+ return -ENOBUFS;
+
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ skb_put_u8(skb, 1 + sizeof(cap));
+ skb_put_u8(skb, WLAN_EID_EXT_HE_6GHZ_CAPA);
+ skb_put_data(skb, &cap, sizeof(cap));
+ return 0;
}
u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
@@ -3785,7 +3051,6 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
}
void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
- bool support_160, bool support_320,
struct cfg80211_chan_def *chandef)
{
chandef->center_freq1 =
@@ -3804,90 +3069,38 @@ void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
chandef->width = NL80211_CHAN_WIDTH_80;
break;
case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ:
- if (support_160) {
- chandef->width = NL80211_CHAN_WIDTH_160;
- chandef->center_freq1 =
- ieee80211_channel_to_frequency(info->ccfs1,
- chandef->chan->band);
- } else {
- chandef->width = NL80211_CHAN_WIDTH_80;
- }
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
break;
case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ:
- if (support_320) {
- chandef->width = NL80211_CHAN_WIDTH_320;
- chandef->center_freq1 =
- ieee80211_channel_to_frequency(info->ccfs1,
- chandef->chan->band);
- } else if (support_160) {
- chandef->width = NL80211_CHAN_WIDTH_160;
- } else {
- chandef->width = NL80211_CHAN_WIDTH_80;
-
- if (chandef->center_freq1 > chandef->chan->center_freq)
- chandef->center_freq1 -= 40;
- else
- chandef->center_freq1 += 40;
- }
+ chandef->width = NL80211_CHAN_WIDTH_320;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
break;
}
}
-bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
const struct ieee80211_he_operation *he_oper,
const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
- const struct ieee80211_sta_he_cap *he_cap;
- const struct ieee80211_sta_eht_cap *eht_cap;
struct cfg80211_chan_def he_chandef = *chandef;
const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- bool support_80_80, support_160, support_320;
- u8 he_phy_cap, eht_phy_cap;
u32 freq;
if (chandef->chan->band != NL80211_BAND_6GHZ)
return true;
- sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
-
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
- if (!he_cap) {
- sdata_info(sdata, "Missing iftype sband data/HE cap");
+ if (!he_oper)
return false;
- }
-
- he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0];
- support_160 =
- he_phy_cap &
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
- support_80_80 =
- he_phy_cap &
- IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
-
- if (!he_oper) {
- sdata_info(sdata,
- "HE is not advertised on (on %d MHz), expect issues\n",
- chandef->chan->center_freq);
- return false;
- }
-
- eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
- if (!eht_cap)
- eht_oper = NULL;
he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
-
- if (!he_6ghz_oper) {
- sdata_info(sdata,
- "HE 6GHz operation missing (on %d MHz), expect issues\n",
- chandef->chan->center_freq);
+ if (!he_6ghz_oper)
return false;
- }
/*
* The EHT operation IE does not contain the primary channel so the
@@ -3896,20 +3109,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
*/
freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary,
NL80211_BAND_6GHZ);
- he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
+ he_chandef.chan = ieee80211_get_channel(local->hw.wiphy, freq);
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- bss_conf->power_type = IEEE80211_REG_LPI_AP;
- break;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- bss_conf->power_type = IEEE80211_REG_SP_AP;
- break;
- default:
- bss_conf->power_type = IEEE80211_REG_UNSET_AP;
- break;
- }
+ if (!he_chandef.chan)
+ return false;
if (!eht_oper ||
!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
@@ -3928,13 +3131,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
he_chandef.width = NL80211_CHAN_WIDTH_80;
if (!he_6ghz_oper->ccfs1)
break;
- if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
- if (support_160)
- he_chandef.width = NL80211_CHAN_WIDTH_160;
- } else {
- if (support_80_80)
- he_chandef.width = NL80211_CHAN_WIDTH_80P80;
- }
+ if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8)
+ he_chandef.width = NL80211_CHAN_WIDTH_160;
+ else
+ he_chandef.width = NL80211_CHAN_WIDTH_80P80;
break;
}
@@ -3946,30 +3146,19 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
he_chandef.center_freq1 =
ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
NL80211_BAND_6GHZ);
- if (support_80_80 || support_160)
- he_chandef.center_freq2 =
- ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
- NL80211_BAND_6GHZ);
+ he_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+ NL80211_BAND_6GHZ);
}
} else {
- eht_phy_cap = eht_cap->eht_cap_elem.phy_cap_info[0];
- support_320 =
- eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
-
ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
- support_160, support_320,
&he_chandef);
+ he_chandef.punctured =
+ ieee80211_eht_oper_dis_subchan_bitmap(eht_oper);
}
- if (!cfg80211_chandef_valid(&he_chandef)) {
- sdata_info(sdata,
- "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n",
- he_chandef.chan ? he_chandef.chan->center_freq : 0,
- he_chandef.width,
- he_chandef.center_freq1,
- he_chandef.center_freq2);
+ if (!cfg80211_chandef_valid(&he_chandef))
return false;
- }
*chandef = he_chandef;
@@ -4012,121 +3201,62 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
return true;
}
-int ieee80211_parse_bitrates(enum nl80211_chan_width width,
- const struct ieee80211_supported_band *sband,
- const u8 *srates, int srates_len, u32 *rates)
+int ieee80211_put_srates_elem(struct sk_buff *skb,
+ const struct ieee80211_supported_band *sband,
+ u32 basic_rates, u32 rate_flags, u32 masked_rates,
+ u8 element_id)
{
- u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
- struct ieee80211_rate *br;
- int brate, rate, i, j, count = 0;
-
- *rates = 0;
-
- for (i = 0; i < srates_len; i++) {
- rate = srates[i] & 0x7f;
-
- for (j = 0; j < sband->n_bitrates; j++) {
- br = &sband->bitrates[j];
- if ((rate_flags & br->flags) != rate_flags)
- continue;
+ u8 i, rates, skip;
- brate = DIV_ROUND_UP(br->bitrate, 5);
- if (brate == rate) {
- *rates |= BIT(j);
- count++;
- break;
- }
- }
- }
- return count;
-}
-
-int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- int rate;
- u8 i, rates, *pos;
- u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- u32 rate_flags;
-
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
- sband = local->hw.wiphy->bands[band];
rates = 0;
for (i = 0; i < sband->n_bitrates; i++) {
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
continue;
+ if (masked_rates & BIT(i))
+ continue;
rates++;
}
- if (rates > 8)
- rates = 8;
-
- if (skb_tailroom(skb) < rates + 2)
- return -ENOMEM;
- pos = skb_put(skb, rates + 2);
- *pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = rates;
- for (i = 0; i < rates; i++) {
- u8 basic = 0;
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
-
- if (need_basic && basic_rates & BIT(i))
- basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = basic | (u8) rate;
+ if (element_id == WLAN_EID_SUPP_RATES) {
+ rates = min_t(u8, rates, 8);
+ skip = 0;
+ } else {
+ skip = 8;
+ if (rates <= skip)
+ return 0;
+ rates -= skip;
}
- return 0;
-}
+ if (skb_tailroom(skb) < rates + 2)
+ return -ENOBUFS;
-int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic,
- enum nl80211_band band)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- int rate;
- u8 i, exrates, *pos;
- u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- u32 rate_flags;
+ skb_put_u8(skb, element_id);
+ skb_put_u8(skb, rates);
- rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ for (i = 0; i < sband->n_bitrates && rates; i++) {
+ int rate;
+ u8 basic;
- sband = local->hw.wiphy->bands[band];
- exrates = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
continue;
- exrates++;
- }
+ if (masked_rates & BIT(i))
+ continue;
- if (exrates > 8)
- exrates -= 8;
- else
- exrates = 0;
+ if (skip > 0) {
+ skip--;
+ continue;
+ }
- if (skb_tailroom(skb) < exrates + 2)
- return -ENOMEM;
+ basic = basic_rates & BIT(i) ? 0x80 : 0;
- if (exrates) {
- pos = skb_put(skb, exrates + 2);
- *pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = exrates;
- for (i = 8; i < sband->n_bitrates; i++) {
- u8 basic = 0;
- if ((rate_flags & sband->bitrates[i].flags)
- != rate_flags)
- continue;
- if (need_basic && basic_rates & BIT(i))
- basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
- *pos++ = basic | (u8) rate;
- }
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
+ skb_put_u8(skb, basic | (u8)rate);
+ rates--;
}
+
+ WARN(rates > 0, "rates confused: rates:%d, element:%d\n",
+ rates, element_id);
+
return 0;
}
@@ -4338,7 +3468,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chandef;
+ chandef = sdata->vif.bss_conf.chanreq.oper;
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev,
&chandef,
@@ -4386,78 +3516,92 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_radar_detected);
-ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
+void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c,
+ struct ieee80211_conn_settings *conn)
{
- ieee80211_conn_flags_t ret;
- int tmp;
+ enum nl80211_chan_width new_primary_width;
+ struct ieee80211_conn_settings _ignored = {};
+
+ /* allow passing NULL if caller doesn't care */
+ if (!conn)
+ conn = &_ignored;
+
+again:
+ /* no-HT indicates nothing to do */
+ new_primary_width = NL80211_CHAN_WIDTH_20_NOHT;
switch (c->width) {
+ default:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ WARN_ON_ONCE(1);
+ fallthrough;
case NL80211_CHAN_WIDTH_20:
c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ c->punctured = 0;
break;
case NL80211_CHAN_WIDTH_40:
c->width = NL80211_CHAN_WIDTH_20;
c->center_freq1 = c->chan->center_freq;
- ret = IEEE80211_CONN_DISABLE_40MHZ |
- IEEE80211_CONN_DISABLE_VHT;
+ if (conn->mode == IEEE80211_CONN_MODE_VHT)
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ c->punctured = 0;
break;
case NL80211_CHAN_WIDTH_80:
- tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
- c->width = NL80211_CHAN_WIDTH_40;
- ret = IEEE80211_CONN_DISABLE_VHT;
+ new_primary_width = NL80211_CHAN_WIDTH_40;
+ if (conn->mode == IEEE80211_CONN_MODE_VHT)
+ conn->mode = IEEE80211_CONN_MODE_HT;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_40;
break;
case NL80211_CHAN_WIDTH_80P80:
c->center_freq2 = 0;
c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
break;
case NL80211_CHAN_WIDTH_160:
- /* n_P20 */
- tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P80 */
- tmp /= 4;
- c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
- c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_CONN_DISABLE_80P80MHZ |
- IEEE80211_CONN_DISABLE_160MHZ;
+ new_primary_width = NL80211_CHAN_WIDTH_80;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80;
break;
case NL80211_CHAN_WIDTH_320:
- /* n_P20 */
- tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
- /* n_P160 */
- tmp /= 8;
- c->center_freq1 = c->center_freq1 - 80 + 160 * tmp;
- c->width = NL80211_CHAN_WIDTH_160;
- ret = IEEE80211_CONN_DISABLE_320MHZ;
- break;
- default:
- case NL80211_CHAN_WIDTH_20_NOHT:
- WARN_ON_ONCE(1);
- c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ new_primary_width = NL80211_CHAN_WIDTH_160;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160;
break;
case NL80211_CHAN_WIDTH_1:
case NL80211_CHAN_WIDTH_2:
case NL80211_CHAN_WIDTH_4:
case NL80211_CHAN_WIDTH_8:
case NL80211_CHAN_WIDTH_16:
+ WARN_ON_ONCE(1);
+ /* keep c->width */
+ conn->mode = IEEE80211_CONN_MODE_S1G;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
+ break;
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
WARN_ON_ONCE(1);
/* keep c->width */
- ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
+ conn->mode = IEEE80211_CONN_MODE_LEGACY;
+ conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20;
break;
}
- WARN_ON_ONCE(!cfg80211_chandef_valid(c));
+ if (new_primary_width != NL80211_CHAN_WIDTH_20_NOHT) {
+ c->center_freq1 = cfg80211_chandef_primary(c, new_primary_width,
+ &c->punctured);
+ c->width = new_primary_width;
+ }
- return ret;
+ /*
+ * With an 80 MHz channel, we might have the puncturing in the primary
+ * 40 Mhz channel, but that's not valid when downgraded to 40 MHz width.
+ * In that case, downgrade again.
+ */
+ if (!cfg80211_chandef_valid(c) && c->punctured)
+ goto again;
+
+ WARN_ON_ONCE(!cfg80211_chandef_valid(c));
}
/*
@@ -4773,7 +3917,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list)
if (link->reserved_radar_required)
- radar_detect |= BIT(link->reserved_chandef.width);
+ radar_detect |= BIT(link->reserved.oper.width);
/*
* An in-place reservation context should not have any assigned vifs
@@ -4787,7 +3931,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
continue;
radar_detect |=
- BIT(link->conf->chandef.width);
+ BIT(link->conf->chanreq.oper.width);
}
return radar_detect;
@@ -5037,7 +4181,8 @@ u16 ieee80211_encode_usf(int listen_interval)
return (u16) listen_interval;
}
-u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+/* this may return more than ieee80211_put_eht_cap() will need */
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata)
{
const struct ieee80211_sta_he_cap *he_cap;
const struct ieee80211_sta_eht_cap *eht_cap;
@@ -5049,13 +4194,12 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
if (!sband)
return 0;
- he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
- eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
+ he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
if (!he_cap || !eht_cap)
return 0;
- is_ap = iftype == NL80211_IFTYPE_AP ||
- iftype == NL80211_IFTYPE_P2P_GO;
+ is_ap = sdata->vif.type == NL80211_IFTYPE_AP;
n = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
&eht_cap->eht_cap_elem,
@@ -5067,45 +4211,134 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
return 0;
}
-u8 *ieee80211_ie_build_eht_cap(u8 *pos,
- const struct ieee80211_sta_he_cap *he_cap,
- const struct ieee80211_sta_eht_cap *eht_cap,
- u8 *end,
- bool for_ap)
+int ieee80211_put_eht_cap(struct sk_buff *skb,
+ struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_supported_band *sband,
+ const struct ieee80211_conn_settings *conn)
{
+ const struct ieee80211_sta_he_cap *he_cap =
+ ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+ const struct ieee80211_sta_eht_cap *eht_cap =
+ ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+ bool for_ap = sdata->vif.type == NL80211_IFTYPE_AP;
+ struct ieee80211_eht_cap_elem_fixed fixed;
+ struct ieee80211_he_cap_elem he;
u8 mcs_nss_len, ppet_len;
+ u8 orig_mcs_nss_len;
u8 ie_len;
- u8 *orig_pos = pos;
+
+ if (!conn)
+ conn = &ieee80211_conn_settings_unlimited;
/* Make sure we have place for the IE */
if (!he_cap || !eht_cap)
- return orig_pos;
+ return 0;
+
+ orig_mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ for_ap);
+
+ ieee80211_get_adjusted_he_cap(conn, he_cap, &he);
+
+ fixed = eht_cap->eht_cap_elem;
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_80)
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ;
- mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
- &eht_cap->eht_cap_elem,
- for_ap);
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) {
+ fixed.phy_cap_info[1] &=
+ ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK;
+ fixed.phy_cap_info[2] &=
+ ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK;
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ;
+ }
+
+ if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320) {
+ fixed.phy_cap_info[0] &=
+ ~IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
+ fixed.phy_cap_info[1] &=
+ ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK;
+ fixed.phy_cap_info[2] &=
+ ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK;
+ fixed.phy_cap_info[6] &=
+ ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ;
+ }
+
+ if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20)
+ fixed.phy_cap_info[0] &=
+ ~IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ;
+
+ mcs_nss_len = ieee80211_eht_mcs_nss_size(&he, &fixed, for_ap);
ppet_len = ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
- eht_cap->eht_cap_elem.phy_cap_info);
+ fixed.phy_cap_info);
ie_len = 2 + 1 + sizeof(eht_cap->eht_cap_elem) + mcs_nss_len + ppet_len;
- if ((end - pos) < ie_len)
- return orig_pos;
+ if (skb_tailroom(skb) < ie_len)
+ return -ENOBUFS;
- *pos++ = WLAN_EID_EXTENSION;
- *pos++ = ie_len - 2;
- *pos++ = WLAN_EID_EXT_EHT_CAPABILITY;
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ skb_put_u8(skb, ie_len - 2);
+ skb_put_u8(skb, WLAN_EID_EXT_EHT_CAPABILITY);
+ skb_put_data(skb, &fixed, sizeof(fixed));
- /* Fixed data */
- memcpy(pos, &eht_cap->eht_cap_elem, sizeof(eht_cap->eht_cap_elem));
- pos += sizeof(eht_cap->eht_cap_elem);
+ if (mcs_nss_len == 4 && orig_mcs_nss_len != 4) {
+ /*
+ * If the (non-AP) STA became 20 MHz only, then convert from
+ * <=80 to 20-MHz-only format, where MCSes are indicated in
+ * the groups 0-7, 8-9, 10-11, 12-13 rather than just 0-9,
+ * 10-11, 12-13. Thus, use 0-9 for 0-7 and 8-9.
+ */
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs11_max_nss);
+ skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss);
+ } else {
+ skb_put_data(skb, &eht_cap->eht_mcs_nss_supp, mcs_nss_len);
+ }
- memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len);
- pos += mcs_nss_len;
+ if (ppet_len)
+ skb_put_data(skb, &eht_cap->eht_ppe_thres, ppet_len);
- if (ppet_len) {
- memcpy(pos, &eht_cap->eht_ppe_thres, ppet_len);
- pos += ppet_len;
- }
+ return 0;
+}
- return pos;
+const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode)
+{
+ static const char * const modes[] = {
+ [IEEE80211_CONN_MODE_S1G] = "S1G",
+ [IEEE80211_CONN_MODE_LEGACY] = "legacy",
+ [IEEE80211_CONN_MODE_HT] = "HT",
+ [IEEE80211_CONN_MODE_VHT] = "VHT",
+ [IEEE80211_CONN_MODE_HE] = "HE",
+ [IEEE80211_CONN_MODE_EHT] = "EHT",
+ };
+
+ if (WARN_ON(mode >= ARRAY_SIZE(modes)))
+ return "<out of range>";
+
+ return modes[mode] ?: "<missing string>";
+}
+
+enum ieee80211_conn_bw_limit
+ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef)
+{
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ return IEEE80211_CONN_BW_LIMIT_20;
+ case NL80211_CHAN_WIDTH_40:
+ return IEEE80211_CONN_BW_LIMIT_40;
+ case NL80211_CHAN_WIDTH_80:
+ return IEEE80211_CONN_BW_LIMIT_80;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ return IEEE80211_CONN_BW_LIMIT_160;
+ case NL80211_CHAN_WIDTH_320:
+ return IEEE80211_CONN_BW_LIMIT_320;
+ default:
+ WARN(1, "unhandled chandef width %d\n", chandef->width);
+ return IEEE80211_CONN_BW_LIMIT_20;
+ }
}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index bc13b14199..642891cafb 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -369,7 +369,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
if (eht_cap->has_eht &&
- link_conf->chandef.chan->band == NL80211_BAND_6GHZ) {
+ link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) {
info = eht_cap->eht_cap_elem.phy_cap_info[0];
if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) {
@@ -380,7 +380,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
info = he_cap->he_cap_elem.phy_cap_info[0];
- if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ) {
+ if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ) {
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
ret = IEEE80211_STA_RX_BW_40;
else
@@ -515,7 +515,7 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
if (WARN_ON(!link_conf))
bss_width = NL80211_CHAN_WIDTH_20_NOHT;
else
- bss_width = link_conf->chandef.width;
+ bss_width = link_conf->chanreq.oper.width;
rcu_read_unlock();
bw = ieee80211_sta_cap_rx_bw(link_sta);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 94dae7cb6d..047a337970 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -315,7 +315,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
* Calculate AAD for CCMP/GCMP, returning qos_tid since we
* need that in CCMP also for b_0.
*/
-static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
+static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (void *)skb->data;
__le16 mask_fc;
@@ -340,7 +340,14 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
len_a += 6;
if (ieee80211_is_data_qos(hdr->frame_control)) {
- qos_tid = ieee80211_get_tid(hdr);
+ qos_tid = *ieee80211_get_qos_ctl(hdr);
+
+ if (spp_amsdu)
+ qos_tid &= IEEE80211_QOS_CTL_TID_MASK |
+ IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+ else
+ qos_tid &= IEEE80211_QOS_CTL_TID_MASK;
+
mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
len_a += 2;
} else {
@@ -369,10 +376,11 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
return qos_tid;
}
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
+ bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- u8 qos_tid = ccmp_gcmp_aad(skb, aad);
+ u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu);
/* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
* mode authentication are not allowed to collide, yet both are derived
@@ -479,7 +487,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
return 0;
pos += IEEE80211_CCMP_HDR_LEN;
- ccmp_special_blocks(skb, pn, b_0, aad);
+ ccmp_special_blocks(skb, pn, b_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
skb_put(skb, mic_len));
}
@@ -557,7 +566,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
u8 aad[2 * AES_BLOCK_SIZE];
u8 b_0[AES_BLOCK_SIZE];
/* hardware didn't decrypt/verify MIC */
- ccmp_special_blocks(skb, pn, b_0, aad);
+ ccmp_special_blocks(skb, pn, b_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
if (ieee80211_aes_ccm_decrypt(
key->u.ccmp.tfm, b_0, aad,
@@ -581,7 +591,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_CONTINUE;
}
-static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
+static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad,
+ bool spp_amsdu)
{
struct ieee80211_hdr *hdr = (void *)skb->data;
@@ -591,7 +602,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
j_0[14] = 0;
j_0[AES_BLOCK_SIZE - 1] = 0x01;
- ccmp_gcmp_aad(skb, aad);
+ ccmp_gcmp_aad(skb, aad, spp_amsdu);
}
static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
@@ -680,7 +691,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
return 0;
pos += IEEE80211_GCMP_HDR_LEN;
- gcmp_special_blocks(skb, pn, j_0, aad);
+ gcmp_special_blocks(skb, pn, j_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
skb_put(skb, IEEE80211_GCMP_MIC_LEN));
}
@@ -753,7 +765,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
u8 aad[2 * AES_BLOCK_SIZE];
u8 j_0[AES_BLOCK_SIZE];
/* hardware didn't decrypt/verify MIC */
- gcmp_special_blocks(skb, pn, j_0, aad);
+ gcmp_special_blocks(skb, pn, j_0, aad,
+ key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU);
if (ieee80211_aes_gcm_decrypt(
key->u.gcmp.tfm, j_0, aad,
@@ -882,7 +895,8 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
info = IEEE80211_SKB_CB(skb);
- if (info->control.hw_key)
+ if (info->control.hw_key &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
return TX_CONTINUE;
if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -898,6 +912,9 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
bip_ipn_set64(mmie->sequence_number, pn64);
+ if (info->control.hw_key)
+ return TX_CONTINUE;
+
bip_aad(skb, aad);
/* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128)
@@ -1027,7 +1044,8 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
info = IEEE80211_SKB_CB(skb);
- if (info->control.hw_key)
+ if (info->control.hw_key &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
return TX_CONTINUE;
if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -1043,6 +1061,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
bip_ipn_set64(mmie->sequence_number, pn64);
+ if (info->control.hw_key)
+ return TX_CONTINUE;
+
bip_aad(skb, aad);
hdr = (struct ieee80211_hdr *)skb->data;
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 9ab7396668..21b7c3b280 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy,
}
phy->symbol_duration = duration;
- phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
- phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
+ phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
}
EXPORT_SYMBOL(ieee802154_configure_durations);
@@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period =
- (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
- wpan_phy->sifs_period =
- (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
+ wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 2a6f1ed763..6fbed5bb5c 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work)
if (res)
goto err_tx;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, skb->len);
ieee802154_xmit_complete(&local->hw, skb, false);
@@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
if (ret)
goto err_wake_netif_queue;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, len);
} else {
local->tx_skb = skb;
queue_work(local->workqueue, &local->sync_tx_work);
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
index 3a5c0e70da..d8d3413a37 100644
--- a/net/mctp/Kconfig
+++ b/net/mctp/Kconfig
@@ -14,6 +14,7 @@ menuconfig MCTP
config MCTP_TEST
bool "MCTP core tests" if !KUNIT_ALL_TESTS
+ select MCTP_FLOWS
depends on MCTP=y && KUNIT=y
default KUNIT_ALL_TESTS
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index f6be58b68c..de52a9191d 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -350,30 +350,102 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
}
-static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+/* helpers for reading/writing the tag ioc, handling compatibility across the
+ * two versions, and some basic API error checking
+ */
+static int mctp_ioctl_tag_copy_from_user(unsigned long arg,
+ struct mctp_ioc_tag_ctl2 *ctl,
+ bool tagv2)
+{
+ struct mctp_ioc_tag_ctl ctl_compat;
+ unsigned long size;
+ void *ptr;
+ int rc;
+
+ if (tagv2) {
+ size = sizeof(*ctl);
+ ptr = ctl;
+ } else {
+ size = sizeof(ctl_compat);
+ ptr = &ctl_compat;
+ }
+
+ rc = copy_from_user(ptr, (void __user *)arg, size);
+ if (rc)
+ return -EFAULT;
+
+ if (!tagv2) {
+ /* compat, using defaults for new fields */
+ ctl->net = MCTP_INITIAL_DEFAULT_NET;
+ ctl->peer_addr = ctl_compat.peer_addr;
+ ctl->local_addr = MCTP_ADDR_ANY;
+ ctl->flags = ctl_compat.flags;
+ ctl->tag = ctl_compat.tag;
+ }
+
+ if (ctl->flags)
+ return -EINVAL;
+
+ if (ctl->local_addr != MCTP_ADDR_ANY &&
+ ctl->local_addr != MCTP_ADDR_NULL)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mctp_ioctl_tag_copy_to_user(unsigned long arg,
+ struct mctp_ioc_tag_ctl2 *ctl,
+ bool tagv2)
+{
+ struct mctp_ioc_tag_ctl ctl_compat;
+ unsigned long size;
+ void *ptr;
+ int rc;
+
+ if (tagv2) {
+ ptr = ctl;
+ size = sizeof(*ctl);
+ } else {
+ ctl_compat.peer_addr = ctl->peer_addr;
+ ctl_compat.tag = ctl->tag;
+ ctl_compat.flags = ctl->flags;
+
+ ptr = &ctl_compat;
+ size = sizeof(ctl_compat);
+ }
+
+ rc = copy_to_user((void __user *)arg, ptr, size);
+ if (rc)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, bool tagv2,
+ unsigned long arg)
{
struct net *net = sock_net(&msk->sk);
struct mctp_sk_key *key = NULL;
- struct mctp_ioc_tag_ctl ctl;
+ struct mctp_ioc_tag_ctl2 ctl;
unsigned long flags;
u8 tag;
+ int rc;
- if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
- return -EFAULT;
+ rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2);
+ if (rc)
+ return rc;
if (ctl.tag)
return -EINVAL;
- if (ctl.flags)
- return -EINVAL;
-
- key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
- true, &tag);
+ key = mctp_alloc_local_tag(msk, ctl.net, MCTP_ADDR_ANY,
+ ctl.peer_addr, true, &tag);
if (IS_ERR(key))
return PTR_ERR(key);
ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
- if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+ rc = mctp_ioctl_tag_copy_to_user(arg, &ctl, tagv2);
+ if (rc) {
unsigned long fl2;
/* Unwind our key allocation: the keys list lock needs to be
* taken before the individual key locks, and we need a valid
@@ -385,28 +457,27 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED);
mctp_key_unref(key);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
- return -EFAULT;
+ return rc;
}
mctp_key_unref(key);
return 0;
}
-static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2,
+ unsigned long arg)
{
struct net *net = sock_net(&msk->sk);
- struct mctp_ioc_tag_ctl ctl;
+ struct mctp_ioc_tag_ctl2 ctl;
unsigned long flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
int rc;
u8 tag;
- if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
- return -EFAULT;
-
- if (ctl.flags)
- return -EINVAL;
+ rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2);
+ if (rc)
+ return rc;
/* Must be a local tag, TO set, preallocated */
if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
@@ -422,6 +493,7 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
*/
spin_lock_irqsave(&key->lock, fl2);
if (key->manual_alloc &&
+ ctl.net == key->net &&
ctl.peer_addr == key->peer_addr &&
tag == key->tag) {
__mctp_key_remove(key, net, fl2,
@@ -439,12 +511,17 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+ bool tagv2 = false;
switch (cmd) {
+ case SIOCMCTPALLOCTAG2:
case SIOCMCTPALLOCTAG:
- return mctp_ioctl_alloctag(msk, arg);
+ tagv2 = cmd == SIOCMCTPALLOCTAG2;
+ return mctp_ioctl_alloctag(msk, tagv2, arg);
case SIOCMCTPDROPTAG:
- return mctp_ioctl_droptag(msk, arg);
+ case SIOCMCTPDROPTAG2:
+ tagv2 = cmd == SIOCMCTPDROPTAG2;
+ return mctp_ioctl_droptag(msk, tagv2, arg);
}
return -EINVAL;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 01c530dbc1..eefd7834d9 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -73,13 +73,50 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
return NULL;
}
-static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
- mctp_eid_t peer, u8 tag)
+/* A note on the key allocations.
+ *
+ * struct net->mctp.keys contains our set of currently-allocated keys for
+ * MCTP tag management. The lookup tuple for these is the peer EID,
+ * local EID and MCTP tag.
+ *
+ * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
+ * broadcast message is sent, we may receive responses from any peer EID.
+ * Because the broadcast dest address is equivalent to ANY, we create
+ * a key with (local = local-eid, peer = ANY). This allows a match on the
+ * incoming broadcast responses from any peer.
+ *
+ * We perform lookups when packets are received, and when tags are allocated
+ * in two scenarios:
+ *
+ * - when a packet is sent, with a locally-owned tag: we need to find an
+ * unused tag value for the (local, peer) EID pair.
+ *
+ * - when a tag is manually allocated: we need to find an unused tag value
+ * for the peer EID, but don't have a specific local EID at that stage.
+ *
+ * in the latter case, on successful allocation, we end up with a tag with
+ * (local = ANY, peer = peer-eid).
+ *
+ * So, the key set allows both a local EID of ANY, as well as a peer EID of
+ * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
+ * The matching (in mctp_key_match()) during lookup allows the match value to
+ * be ANY in either the dest or source addresses.
+ *
+ * When allocating (+ inserting) a tag, we need to check for conflicts amongst
+ * the existing tag set. This requires macthing either exactly on the local
+ * and peer addresses, or either being ANY.
+ */
+
+static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
+ mctp_eid_t local, mctp_eid_t peer, u8 tag)
{
+ if (key->net != net)
+ return false;
+
if (!mctp_address_matches(key->local_addr, local))
return false;
- if (key->peer_addr != peer)
+ if (!mctp_address_matches(key->peer_addr, peer))
return false;
if (key->tag != tag)
@@ -92,7 +129,7 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
* key exists.
*/
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
- mctp_eid_t peer,
+ unsigned int netid, mctp_eid_t peer,
unsigned long *irqflags)
__acquires(&key->lock)
{
@@ -108,7 +145,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry(key, &net->mctp.keys, hlist) {
- if (!mctp_key_match(key, mh->dest, peer, tag))
+ if (!mctp_key_match(key, netid, mh->dest, peer, tag))
continue;
spin_lock(&key->lock);
@@ -131,6 +168,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
}
static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ unsigned int net,
mctp_eid_t local, mctp_eid_t peer,
u8 tag, gfp_t gfp)
{
@@ -140,6 +178,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
if (!key)
return NULL;
+ key->net = net;
key->peer_addr = peer;
key->local_addr = local;
key->tag = tag;
@@ -185,8 +224,8 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
}
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
- if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
- key->tag)) {
+ if (mctp_key_match(tmp, key->net, key->local_addr,
+ key->peer_addr, key->tag)) {
spin_lock(&tmp->lock);
if (tmp->valid)
rc = -EEXIST;
@@ -327,6 +366,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct mctp_sock *msk;
struct mctp_hdr *mh;
+ unsigned int netid;
unsigned long f;
u8 tag, flags;
int rc;
@@ -345,6 +385,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* grab header, advance data ptr */
mh = mctp_hdr(skb);
+ netid = mctp_cb(skb)->net;
skb_pull(skb, sizeof(struct mctp_hdr));
if (mh->ver != 1)
@@ -358,7 +399,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* lookup socket / reasm context, exactly matching (src,dest,tag).
* we hold a ref on the key, and key->lock held.
*/
- key = mctp_lookup_key(net, skb, mh->src, &f);
+ key = mctp_lookup_key(net, skb, netid, mh->src, &f);
if (flags & MCTP_HDR_FLAG_SOM) {
if (key) {
@@ -368,8 +409,12 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key lookup to find the socket, but don't use this
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
+ *
+ * this lookup requires key->peer to be MCTP_ADDR_ANY,
+ * it doesn't match just any key->peer.
*/
- any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
+ any_key = mctp_lookup_key(net, skb, netid,
+ MCTP_ADDR_ANY, &f);
if (any_key) {
msk = container_of(any_key->sk,
struct mctp_sock, sk);
@@ -406,7 +451,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* packets for this message
*/
if (!key) {
- key = mctp_key_alloc(msk, mh->dest, mh->src,
+ key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
tag, GFP_ATOMIC);
if (!key) {
rc = -ENOMEM;
@@ -596,11 +641,12 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
refcount_inc(&key->refs);
}
-/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+/* Allocate a locally-owned tag value for (local, peer), and reserve
* it for the socket msk
*/
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t daddr, mctp_eid_t saddr,
+ unsigned int netid,
+ mctp_eid_t local, mctp_eid_t peer,
bool manual, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
@@ -610,11 +656,11 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
u8 tagbits;
/* for NULL destination EIDs, we may get a response from any peer */
- if (daddr == MCTP_ADDR_NULL)
- daddr = MCTP_ADDR_ANY;
+ if (peer == MCTP_ADDR_NULL)
+ peer = MCTP_ADDR_ANY;
/* be optimistic, alloc now */
- key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+ key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
if (!key)
return ERR_PTR(-ENOMEM);
@@ -631,12 +677,24 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
* lock held, they don't change over the lifetime of the key.
*/
+ /* tags are net-specific */
+ if (tmp->net != netid)
+ continue;
+
/* if we don't own the tag, it can't conflict */
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
- mctp_address_matches(tmp->local_addr, saddr)))
+ /* Since we're avoiding conflicting entries, match peer and
+ * local addresses, including with a wildcard on ANY. See
+ * 'A note on key allocations' for background.
+ */
+ if (peer != MCTP_ADDR_ANY &&
+ !mctp_address_matches(tmp->peer_addr, peer))
+ continue;
+
+ if (local != MCTP_ADDR_ANY &&
+ !mctp_address_matches(tmp->local_addr, local))
continue;
spin_lock(&tmp->lock);
@@ -671,6 +729,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
}
static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+ unsigned int netid,
mctp_eid_t daddr,
u8 req_tag, u8 *tagp)
{
@@ -685,6 +744,9 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
spin_lock_irqsave(&mns->keys_lock, flags);
hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ if (tmp->net != netid)
+ continue;
+
if (tmp->tag != req_tag)
continue;
@@ -868,6 +930,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct mctp_sk_key *key;
struct mctp_hdr *hdr;
unsigned long flags;
+ unsigned int netid;
unsigned int mtu;
mctp_eid_t saddr;
bool ext_rt;
@@ -919,16 +982,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = 0;
}
spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+ netid = READ_ONCE(rt->dev->net);
if (rc)
goto out_release;
if (req_tag & MCTP_TAG_OWNER) {
if (req_tag & MCTP_TAG_PREALLOC)
- key = mctp_lookup_prealloc_tag(msk, daddr,
+ key = mctp_lookup_prealloc_tag(msk, netid, daddr,
req_tag, &tag);
else
- key = mctp_alloc_local_tag(msk, daddr, saddr,
+ key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
false, &tag);
if (IS_ERR(key)) {
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 92ea4158f7..77e5dd4222 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -79,6 +79,16 @@ static void mctp_test_route_destroy(struct kunit *test,
kfree_rcu(&rt->rt, rcu);
}
+static void mctp_test_skb_set_dev(struct sk_buff *skb,
+ struct mctp_test_dev *dev)
+{
+ struct mctp_skb_cb *cb;
+
+ cb = mctp_cb(skb);
+ cb->net = READ_ONCE(dev->mdev->net);
+ skb->dev = dev->ndev;
+}
+
static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
unsigned int data_len)
{
@@ -91,6 +101,7 @@ static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
if (!skb)
return NULL;
+ __mctp_cb(skb);
memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
buf = skb_put(skb, data_len);
@@ -111,6 +122,7 @@ static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
if (!skb)
return NULL;
+ __mctp_cb(skb);
memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
memcpy(skb_put(skb, data_len), data, data_len);
@@ -249,8 +261,6 @@ static void mctp_test_rx_input(struct kunit *test)
skb = mctp_test_create_skb(&params->hdr, 1);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- __mctp_cb(skb);
-
mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL);
KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input);
@@ -283,7 +293,8 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests,
static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_dev **devp,
struct mctp_test_route **rtp,
- struct socket **sockp)
+ struct socket **sockp,
+ unsigned int netid)
{
struct sockaddr_mctp addr = {0};
struct mctp_test_route *rt;
@@ -293,6 +304,8 @@ static void __mctp_route_test_init(struct kunit *test,
dev = mctp_test_create_dev();
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ if (netid != MCTP_NET_ANY)
+ WRITE_ONCE(dev->mdev->net, netid);
rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
@@ -301,7 +314,7 @@ static void __mctp_route_test_init(struct kunit *test,
KUNIT_ASSERT_EQ(test, rc, 0);
addr.smctp_family = AF_MCTP;
- addr.smctp_network = MCTP_NET_ANY;
+ addr.smctp_network = netid;
addr.smctp_addr.s_addr = 8;
addr.smctp_type = 0;
rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
@@ -339,13 +352,12 @@ static void mctp_test_route_input_sk(struct kunit *test)
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock);
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
skb = mctp_test_create_skb_data(&params->hdr, &params->type);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
@@ -410,15 +422,14 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock);
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
for (i = 0; i < params->n_hdrs; i++) {
c = i;
skb = mctp_test_create_skb_data(&params->hdrs[i], &c);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
}
@@ -544,6 +555,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
struct mctp_sock *msk;
struct socket *sock;
unsigned long flags;
+ unsigned int net;
int rc;
u8 c;
@@ -551,6 +563,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
dev = mctp_test_create_dev();
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ net = READ_ONCE(dev->mdev->net);
rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
@@ -562,8 +575,9 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
mns = &sock_net(sock->sk)->mctp;
/* set the incoming tag according to test params */
- key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
- params->key_tag, GFP_KERNEL);
+ key = mctp_key_alloc(msk, net, params->key_local_addr,
+ params->key_peer_addr, params->key_tag,
+ GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
@@ -576,8 +590,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
skb = mctp_test_create_skb_data(&params->hdr, &c);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
- skb->dev = dev->ndev;
- __mctp_cb(skb);
+ mctp_test_skb_set_dev(skb, dev);
rc = mctp_route_input(&rt->rt, skb);
@@ -665,6 +678,373 @@ static void mctp_route_input_sk_keys_to_desc(
KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
mctp_route_input_sk_keys_to_desc);
+struct test_net {
+ unsigned int netid;
+ struct mctp_test_dev *dev;
+ struct mctp_test_route *rt;
+ struct socket *sock;
+ struct sk_buff *skb;
+ struct mctp_sk_key *key;
+ struct {
+ u8 type;
+ unsigned int data;
+ } msg;
+};
+
+static void
+mctp_test_route_input_multiple_nets_bind_init(struct kunit *test,
+ struct test_net *t)
+{
+ struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO);
+
+ t->msg.data = t->netid;
+
+ __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+
+ t->skb = mctp_test_create_skb_data(&hdr, &t->msg);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb);
+ mctp_test_skb_set_dev(t->skb, t->dev);
+}
+
+static void
+mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test,
+ struct test_net *t)
+{
+ __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+}
+
+/* Test that skbs from different nets (otherwise identical) get routed to their
+ * corresponding socket via the sockets' bind()
+ */
+static void mctp_test_route_input_multiple_nets_bind(struct kunit *test)
+{
+ struct sk_buff *rx_skb1, *rx_skb2;
+ struct test_net t1, t2;
+ int rc;
+
+ t1.netid = 1;
+ t2.netid = 2;
+
+ t1.msg.type = 0;
+ t2.msg.type = 0;
+
+ mctp_test_route_input_multiple_nets_bind_init(test, &t1);
+ mctp_test_route_input_multiple_nets_bind_init(test, &t2);
+
+ rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+ rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1);
+ KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)),
+ t1.netid);
+ kfree_skb(rx_skb1);
+
+ rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2);
+ KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)),
+ t2.netid);
+ kfree_skb(rx_skb2);
+
+ mctp_test_route_input_multiple_nets_bind_fini(test, &t1);
+ mctp_test_route_input_multiple_nets_bind_fini(test, &t2);
+}
+
+static void
+mctp_test_route_input_multiple_nets_key_init(struct kunit *test,
+ struct test_net *t)
+{
+ struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1));
+ struct mctp_sock *msk;
+ struct netns_mctp *mns;
+ unsigned long flags;
+
+ t->msg.data = t->netid;
+
+ __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+
+ msk = container_of(t->sock->sk, struct mctp_sock, sk);
+
+ t->key = mctp_key_alloc(msk, t->netid, hdr.dest, hdr.src, 1, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key);
+
+ mns = &sock_net(t->sock->sk)->mctp;
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ mctp_reserve_tag(&init_net, t->key, msk);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key);
+ t->skb = mctp_test_create_skb_data(&hdr, &t->msg);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb);
+ mctp_test_skb_set_dev(t->skb, t->dev);
+}
+
+static void
+mctp_test_route_input_multiple_nets_key_fini(struct kunit *test,
+ struct test_net *t)
+{
+ mctp_key_unref(t->key);
+ __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+}
+
+/* test that skbs from different nets (otherwise identical) get routed to their
+ * corresponding socket via the sk_key
+ */
+static void mctp_test_route_input_multiple_nets_key(struct kunit *test)
+{
+ struct sk_buff *rx_skb1, *rx_skb2;
+ struct test_net t1, t2;
+ int rc;
+
+ t1.netid = 1;
+ t2.netid = 2;
+
+ /* use type 1 which is not bound */
+ t1.msg.type = 1;
+ t2.msg.type = 1;
+
+ mctp_test_route_input_multiple_nets_key_init(test, &t1);
+ mctp_test_route_input_multiple_nets_key_init(test, &t2);
+
+ rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+ rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1);
+ KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)),
+ t1.netid);
+ kfree_skb(rx_skb1);
+
+ rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2);
+ KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg));
+ KUNIT_EXPECT_EQ(test,
+ *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)),
+ t2.netid);
+ kfree_skb(rx_skb2);
+
+ mctp_test_route_input_multiple_nets_key_fini(test, &t1);
+ mctp_test_route_input_multiple_nets_key_fini(test, &t2);
+}
+
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+
+static void mctp_test_flow_init(struct kunit *test,
+ struct mctp_test_dev **devp,
+ struct mctp_test_route **rtp,
+ struct socket **sock,
+ struct sk_buff **skbp,
+ unsigned int len)
+{
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+
+ /* we have a slightly odd routing setup here; the test route
+ * is for EID 8, which is our local EID. We don't do a routing
+ * lookup, so that's fine - all we require is a path through
+ * mctp_local_output, which will call rt->output on whatever
+ * route we provide
+ */
+ __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY);
+
+ /* Assign a single EID. ->addrs is freed on mctp netdev release */
+ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
+ dev->mdev->num_addrs = 1;
+ dev->mdev->addrs[0] = 8;
+
+ skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, skb);
+ __mctp_cb(skb);
+ skb_reserve(skb, sizeof(struct mctp_hdr) + 1);
+ memset(skb_put(skb, len), 0, len);
+
+ /* take a ref for the route, we'll decrement in local output */
+ refcount_inc(&rt->rt.refs);
+
+ *devp = dev;
+ *rtp = rt;
+ *skbp = skb;
+}
+
+static void mctp_test_flow_fini(struct kunit *test,
+ struct mctp_test_dev *dev,
+ struct mctp_test_route *rt,
+ struct socket *sock)
+{
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+/* test that an outgoing skb has the correct MCTP extension data set */
+static void mctp_test_packet_flow(struct kunit *test)
+{
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_flow *flow;
+ struct socket *sock;
+ u8 dst = 8;
+ int n, rc;
+
+ mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ n = rt->pkts.qlen;
+ KUNIT_ASSERT_EQ(test, n, 1);
+
+ skb2 = skb_dequeue(&rt->pkts);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2);
+
+ flow = skb_ext_find(skb2, SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow->key);
+ KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk);
+
+ kfree_skb(skb2);
+ mctp_test_flow_fini(test, dev, rt, sock);
+}
+
+/* test that outgoing skbs, after fragmentation, all have the correct MCTP
+ * extension data set.
+ */
+static void mctp_test_fragment_flow(struct kunit *test)
+{
+ struct mctp_flow *flows[2];
+ struct sk_buff *tx_skbs[2];
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+ struct socket *sock;
+ u8 dst = 8;
+ int n, rc;
+
+ mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ n = rt->pkts.qlen;
+ KUNIT_ASSERT_EQ(test, n, 2);
+
+ /* both resulting packets should have the same flow data */
+ tx_skbs[0] = skb_dequeue(&rt->pkts);
+ tx_skbs[1] = skb_dequeue(&rt->pkts);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]);
+
+ flows[0] = skb_ext_find(tx_skbs[0], SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]->key);
+ KUNIT_ASSERT_PTR_EQ(test, flows[0]->key->sk, sock->sk);
+
+ flows[1] = skb_ext_find(tx_skbs[1], SKB_EXT_MCTP);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[1]);
+ KUNIT_ASSERT_PTR_EQ(test, flows[1]->key, flows[0]->key);
+
+ kfree_skb(tx_skbs[0]);
+ kfree_skb(tx_skbs[1]);
+ mctp_test_flow_fini(test, dev, rt, sock);
+}
+
+#else
+static void mctp_test_packet_flow(struct kunit *test)
+{
+ kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y");
+}
+
+static void mctp_test_fragment_flow(struct kunit *test)
+{
+ kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y");
+}
+#endif
+
+/* Test that outgoing skbs cause a suitable tag to be created */
+static void mctp_test_route_output_key_create(struct kunit *test)
+{
+ const unsigned int netid = 50;
+ const u8 dst = 26, src = 15;
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_sk_key *key;
+ struct netns_mctp *mns;
+ unsigned long flags;
+ struct socket *sock;
+ struct sk_buff *skb;
+ bool empty, single;
+ const int len = 2;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ WRITE_ONCE(dev->mdev->net, netid);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
+ dev->mdev->num_addrs = 1;
+ dev->mdev->addrs[0] = src;
+
+ skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, skb);
+ __mctp_cb(skb);
+ skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len);
+ memset(skb_put(skb, len), 0, len);
+
+ refcount_inc(&rt->rt.refs);
+
+ mns = &sock_net(sock->sk)->mctp;
+
+ /* We assume we're starting from an empty keys list, which requires
+ * preceding tests to clean up correctly!
+ */
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ empty = hlist_empty(&mns->keys);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+ KUNIT_ASSERT_TRUE(test, empty);
+
+ rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ key = NULL;
+ single = false;
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ if (!hlist_empty(&mns->keys)) {
+ key = hlist_entry(mns->keys.first, struct mctp_sk_key, hlist);
+ single = hlist_is_singular_node(&key->hlist, &mns->keys);
+ }
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ KUNIT_ASSERT_NOT_NULL(test, key);
+ KUNIT_ASSERT_TRUE(test, single);
+
+ KUNIT_EXPECT_EQ(test, key->net, netid);
+ KUNIT_EXPECT_EQ(test, key->local_addr, src);
+ KUNIT_EXPECT_EQ(test, key->peer_addr, dst);
+ /* key has incoming tag, so inverse of what we sent */
+ KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER);
+
+ sock_release(sock);
+ mctp_test_route_destroy(test, rt);
+ mctp_test_destroy_dev(dev);
+}
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
@@ -673,6 +1053,11 @@ static struct kunit_case mctp_test_cases[] = {
mctp_route_input_sk_reasm_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
mctp_route_input_sk_keys_gen_params),
+ KUNIT_CASE(mctp_test_route_input_multiple_nets_bind),
+ KUNIT_CASE(mctp_test_route_input_multiple_nets_key),
+ KUNIT_CASE(mctp_test_packet_flow),
+ KUNIT_CASE(mctp_test_fragment_flow),
+ KUNIT_CASE(mctp_test_route_output_key_create),
{}
};
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index e03ba66bbe..565763eb02 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -4,6 +4,7 @@
#include <linux/mctp.h>
#include <linux/if_arp.h>
+#include <net/mctp.h>
#include <net/mctpdevice.h>
#include <net/pkt_sched.h>
@@ -54,6 +55,7 @@ struct mctp_test_dev *mctp_test_create_dev(void)
rcu_read_lock();
dev->mdev = __mctp_dev_get(ndev);
+ dev->mdev->net = mctp_default_net(dev_net(ndev));
rcu_read_unlock();
return dev;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1af29af653..2dc7a908a6 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -594,7 +594,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net,
struct in_addr daddr;
memcpy(&daddr, addr, sizeof(struct in_addr));
- rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
+ rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return ERR_CAST(rt);
@@ -1154,7 +1154,7 @@ static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev,
if ((all || type == NETCONFA_INPUT) &&
nla_put_s32(skb, NETCONFA_INPUT,
- mdev->input_enabled) < 0)
+ READ_ONCE(mdev->input_enabled)) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -1303,11 +1303,12 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct mpls_dev *mdev;
- int idx, s_idx;
- int h, s_h;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -1324,40 +1325,23 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = net->dev_base_seq;
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- mdev = mpls_dev_get(dev);
- if (!mdev)
- goto cont;
- if (mpls_netconf_fill_devconf(skb, mdev,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ mdev = mpls_dev_get(dev);
+ if (!mdev)
+ continue;
+ err = mpls_netconf_fill_devconf(skb, mdev,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ break;
}
-done:
- cb->args[0] = h;
- cb->args[1] = idx;
+ rcu_read_unlock();
- return skb->len;
+ return err;
}
#define MPLS_PERDEV_SYSCTL_OFFSET(field) \
@@ -1393,13 +1377,13 @@ static const struct ctl_table mpls_dev_table[] = {
.proc_handler = mpls_conf_proc,
.data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
},
- { }
};
static int mpls_dev_sysctl_register(struct net_device *dev,
struct mpls_dev *mdev)
{
char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
+ size_t table_size = ARRAY_SIZE(mpls_dev_table);
struct net *net = dev_net(dev);
struct ctl_table *table;
int i;
@@ -1411,7 +1395,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data = (char *)mdev + (uintptr_t)table[i].data;
table[i].extra1 = mdev;
table[i].extra2 = net;
@@ -1419,8 +1403,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(mpls_dev_table));
+ mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size);
if (!mdev->sysctl)
goto free;
@@ -1438,7 +1421,7 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct mpls_dev *mdev)
{
struct net *net = dev_net(dev);
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!mdev->sysctl)
return;
@@ -2179,7 +2162,9 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
unsigned int flags = NLM_F_MULTI;
size_t platform_labels;
unsigned int index;
@@ -2667,11 +2652,11 @@ static const struct ctl_table mpls_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &ttl_max,
},
- { }
};
static int mpls_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(mpls_table);
struct ctl_table *table;
int i;
@@ -2687,11 +2672,11 @@ static int mpls_net_init(struct net *net)
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data = (char *)net + (uintptr_t)table[i].data;
net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
- ARRAY_SIZE(mpls_table));
+ table_size);
if (net->mpls.ctl == NULL) {
kfree(table);
return -ENOMEM;
@@ -2704,7 +2689,7 @@ static void mpls_net_exit(struct net *net)
{
struct mpls_route __rcu **platform_label;
size_t platform_labels;
- struct ctl_table *table;
+ const struct ctl_table *table;
unsigned int index;
table = net->mpls.ctl->ctl_table_arg;
@@ -2771,7 +2756,8 @@ static int __init mpls_init(void)
mpls_getroute, mpls_dump_routes, 0);
rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf, 0);
+ mpls_netconf_dump_devconf,
+ RTNL_FLAG_DUMP_UNLOCKED);
err = ipgre_tunnel_encap_add_mpls_ops();
if (err)
pr_err("Can't add mpls over gre tunnel ops\n");
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 45d1e6a157..34ab659f54 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -109,5 +109,5 @@ module_init(mpls_gso_init);
module_exit(mpls_gso_exit);
MODULE_DESCRIPTION("MPLS GSO support");
-MODULE_AUTHOR("Simon Horman (horms@verge.net.au)");
+MODULE_AUTHOR("Simon Horman <horms@verge.net.au>");
MODULE_LICENSE("GPL");
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index ef59e25dc4..4385fd3b13 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -55,8 +55,6 @@ static int mpls_xmit(struct sk_buff *skb)
out_dev = dst->dev;
net = dev_net(out_dev);
- skb_orphan(skb);
-
if (!mpls_output_possible(out_dev) ||
!dst->lwtstate || skb_warn_if_lro(skb))
goto drop;
@@ -83,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
+ rt = dst_rtable(dst);
} else if (dst->ops->family == AF_INET6) {
if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
ttl = tun_encap_info->default_ttl;
@@ -92,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
} else {
goto drop;
}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 13fe0748dd..98b1dd498f 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -92,10 +92,65 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
- strcpy(pernet->scheduler, "default");
+ strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
}
#ifdef CONFIG_SYSCTL
+static int mptcp_set_scheduler(const struct net *net, const char *name)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+ struct mptcp_sched_ops *sched;
+ int ret = 0;
+
+ rcu_read_lock();
+ sched = mptcp_sched_find(name);
+ if (sched)
+ strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX);
+ else
+ ret = -ENOENT;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int proc_scheduler(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ const struct net *net = current->nsproxy->net_ns;
+ char val[MPTCP_SCHED_NAME_MAX];
+ struct ctl_table tbl = {
+ .data = val,
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ };
+ int ret;
+
+ strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = mptcp_set_scheduler(net, val);
+
+ return ret;
+}
+
+static int proc_available_schedulers(struct ctl_table *ctl,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ kfree(tbl.data);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -148,7 +203,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.procname = "scheduler",
.maxlen = MPTCP_SCHED_NAME_MAX,
.mode = 0644,
- .proc_handler = proc_dostring,
+ .proc_handler = proc_scheduler,
+ },
+ {
+ .procname = "available_schedulers",
+ .maxlen = MPTCP_SCHED_BUF_MAX,
+ .mode = 0644,
+ .proc_handler = proc_available_schedulers,
},
{
.procname = "close_timeout",
@@ -156,7 +217,6 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {}
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -178,7 +238,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
table[6].data = &pernet->scheduler;
- table[7].data = &pernet->close_timeout;
+ /* table[7] is for available_schedulers which is read-only info */
+ table[8].data = &pernet->close_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -198,7 +259,7 @@ err_alloc:
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
{
- struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
+ const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
unregister_net_sysctl_table(pernet->ctl_table_hdr);
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 7017dd6065..3ae46b545d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -10,7 +10,6 @@
#include <linux/net.h>
#include <linux/inet_diag.h>
#include <net/netlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index c30405e768..7884217f33 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
+ SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX),
+ SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index dd7fd1f246..66aa67f49d 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <net/inet_common.h>
+
enum linux_mptcp_mib_field {
MPTCP_MIB_NUM = 0,
MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */
@@ -12,7 +14,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 5409c2ea3f..0566dd7938 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -10,7 +10,6 @@
#include <linux/net.h>
#include <linux/inet_diag.h>
#include <net/netlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
@@ -225,6 +224,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
}
static const struct inet_diag_handler mptcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = mptcp_diag_dump,
.dump_one = mptcp_diag_dump_one,
.idiag_get_info = mptcp_diag_get_info,
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index 670da7822e..c30a2a90a1 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -32,8 +32,9 @@ const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]
};
/* MPTCP_PM_CMD_GET_ADDR - do */
-const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
- [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
};
/* MPTCP_PM_CMD_FLUSH_ADDRS - do */
@@ -110,7 +111,7 @@ const struct genl_ops mptcp_pm_nl_ops[11] = {
.doit = mptcp_pm_nl_get_addr_doit,
.dumpit = mptcp_pm_nl_get_addr_dumpit,
.policy = mptcp_pm_get_addr_nl_policy,
- .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .maxattr = MPTCP_PM_ATTR_TOKEN,
.flags = GENL_UNS_ADMIN_PERM,
},
{
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
index ac9fc7225b..e24258f6f8 100644
--- a/net/mptcp/mptcp_pm_gen.h
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -18,7 +18,7 @@ extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADD
extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
-extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1];
extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 63fc0758c2..ac2f1a54cc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -689,8 +689,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
- opts->ahmac = add_addr_generate_hmac(msk->local_key,
- msk->remote_key,
+ opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
@@ -792,7 +792,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
*size = TCPOLEN_MPTCP_FASTCLOSE;
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
- opts->rcvr_key = msk->remote_key;
+ opts->rcvr_key = READ_ONCE(msk->remote_key);
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
@@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return true;
} else if (subflow_req->mp_join) {
opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
- opts->backup = subflow_req->backup;
+ opts->backup = subflow_req->request_bkup;
opts->join_id = subflow_req->local_id;
opts->thmac = subflow_req->thmac;
opts->nonce = subflow_req->local_nonce;
@@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->remote_key_valid &&
(((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
- ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) &&
+ (!mp_opt->echo || subflow->mp_join)))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
@@ -1031,7 +1032,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
- msk->snd_una = new_snd_una;
+ WRITE_ONCE(msk->snd_una, new_snd_una);
}
static void ack_update_msk(struct mptcp_sock *msk,
@@ -1058,21 +1059,22 @@ static void ack_update_msk(struct mptcp_sock *msk,
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
if (after64(new_wnd_end, msk->wnd_end))
- msk->wnd_end = new_wnd_end;
+ WRITE_ONCE(msk->wnd_end, new_wnd_end);
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+ if (after64(msk->wnd_end, snd_nxt))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
__mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
+ msk->last_ack_recv = tcp_jiffies32;
mptcp_data_unlock(sk);
trace_ack_update_msk(mp_opt->data_ack,
old_snd_una, new_snd_una,
- new_wnd_end, msk->wnd_end);
+ new_wnd_end, READ_ONCE(msk->wnd_end));
}
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -1100,8 +1102,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
if (mp_opt->echo)
return true;
- hmac = add_addr_generate_hmac(msk->remote_key,
- msk->local_key,
+ hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
&mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@ -1148,7 +1150,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
- msk->local_key == mp_opt.rcvr_key) {
+ READ_ONCE(msk->local_key) == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 4ae19113b8..23bb89c94e 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -6,7 +6,6 @@
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
-#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
@@ -77,7 +76,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
+ pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
@@ -427,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, &skc_local);
}
+bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
+{
+ struct mptcp_addr_info skc_local;
+
+ mptcp_local_address((struct sock_common *)skc, &skc_local);
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_is_backup(msk, &skc_local);
+
+ return mptcp_pm_nl_is_backup(msk, &skc_local);
+}
+
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
@@ -441,13 +452,27 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id
return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
}
-int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup)
+int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
+{
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_get_addr(skb, info);
+ return mptcp_pm_nl_get_addr(skb, info);
+}
+
+int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_dump_addr(msg, cb);
+ return mptcp_pm_nl_dump_addr(msg, cb);
+}
+
+int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
{
- if (token)
- return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup);
- return mptcp_pm_nl_set_flags(net, loc, bkup);
+ if (info->attrs[MPTCP_PM_ATTR_TOKEN])
+ return mptcp_userspace_pm_set_flags(skb, info);
+ return mptcp_pm_nl_set_flags(skb, info);
}
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 58d17d9604..4cae2aa7be 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -8,18 +8,13 @@
#include <linux/inet.h>
#include <linux/kernel.h>
-#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
-#include <net/genetlink.h>
-#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
-
-/* forward declaration */
-static struct genl_family mptcp_genl_family;
+#include "mptcp_pm_gen.h"
static int pm_nl_pernet_id;
@@ -353,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
- if (mptcp_pm_is_kernel(msk))
+ if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk)))
return false;
sk_reset_timer(sk, &add_entry->add_timer,
@@ -476,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
subflow->send_mp_prio = 1;
- subflow->backup = backup;
subflow->request_bkup = backup;
}
@@ -505,15 +499,12 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
}
static struct mptcp_pm_addr_entry *
-__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
- bool lookup_by_id)
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
{
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id &&
- mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) ||
- (lookup_by_id && entry->addr.id == info->id))
+ if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
return entry;
}
return NULL;
@@ -521,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
+ struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
struct sock *sk = (struct sock *)msk;
- struct mptcp_pm_addr_entry *local;
unsigned int add_addr_signal_max;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
@@ -543,7 +534,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
rcu_read_lock();
- entry = __lookup_addr(pernet, &mpc_addr, false);
+ entry = __lookup_addr(pernet, &mpc_addr);
if (entry) {
__clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
msk->mpc_endpoint_id = entry->addr.id;
@@ -564,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
- local = select_signal_address(pernet, msk);
-
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
@@ -576,16 +565,26 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
return;
- if (local) {
- if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
- msk->pm.add_addr_signaled++;
- mptcp_pm_announce_addr(msk, &local->addr, false);
- mptcp_pm_nl_addr_send_ack(msk);
- }
- }
+ local = select_signal_address(pernet, msk);
+ if (!local)
+ goto subflow;
+
+ /* If the alloc fails, we are on memory pressure, not worth
+ * continuing, and trying to create subflows.
+ */
+ if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
+ return;
+
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ msk->pm.add_addr_signaled++;
+ mptcp_pm_announce_addr(msk, &local->addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+
+ if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
+ signal_and_subflow = local;
}
+subflow:
/* check if should create a new subflow */
while (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) {
@@ -593,9 +592,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
bool fullmesh;
int i, nr;
- local = select_local_address(pernet, msk);
- if (!local)
- break;
+ if (signal_and_subflow) {
+ local = signal_and_subflow;
+ signal_and_subflow = NULL;
+ } else {
+ local = select_local_address(pernet, msk);
+ if (!local)
+ break;
+ }
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
@@ -685,6 +689,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
unsigned int subflows_max;
+ bool sf_created = false;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
@@ -712,15 +717,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (nr == 0)
return;
- msk->pm.add_addr_accepted++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &addrs[i], &remote);
+ if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ sf_created = true;
spin_lock_bh(&msk->pm.lock);
+
+ if (sf_created) {
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
+ }
}
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
@@ -822,10 +830,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
removed = true;
- __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ if (rm_type == MPTCP_MIB_RMSUBFLOW)
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
}
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
+ else if (rm_type == MPTCP_MIB_RMADDR)
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
if (!removed)
continue;
@@ -1103,6 +1114,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
return ret;
}
+bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+ struct mptcp_pm_addr_entry *entry;
+ bool backup = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return backup;
+}
+
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
@@ -1312,8 +1341,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
- if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+ if (addr.addr.port && !address_use_port(&addr)) {
+ GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port");
return -EINVAL;
}
@@ -1402,6 +1431,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
ret = remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= ret;
mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
@@ -1535,39 +1565,49 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
+ int anno_nr = 0;
list_for_each_entry(entry, rm_list, list) {
- if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
- lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
- alist.nr < MPTCP_RM_IDS_MAX)
- alist.ids[alist.nr++] = entry->addr.id;
+ if (alist.nr >= MPTCP_RM_IDS_MAX)
+ break;
+
+ /* only delete if either announced or matching a subflow */
+ if (remove_anno_list_by_saddr(msk, &entry->addr))
+ anno_nr++;
+ else if (!lookup_subflow_by_saddr(&msk->conn_list,
+ &entry->addr))
+ continue;
+
+ alist.ids[alist.nr++] = entry->addr.id;
}
if (alist.nr) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= anno_nr;
mptcp_pm_remove_addr(msk, &alist);
spin_unlock_bh(&msk->pm.lock);
}
}
-void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
- if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- slist.nr < MPTCP_RM_IDS_MAX)
+ if (slist.nr < MPTCP_RM_IDS_MAX &&
+ lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
slist.ids[slist.nr++] = entry->addr.id;
- if (remove_anno_list_by_saddr(msk, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX)
+ if (alist.nr < MPTCP_RM_IDS_MAX &&
+ remove_anno_list_by_saddr(msk, &entry->addr))
alist.ids[alist.nr++] = entry->addr.id;
}
if (alist.nr) {
spin_lock_bh(&msk->pm.lock);
+ msk->pm.add_addr_signaled -= alist.nr;
mptcp_pm_remove_addr(msk, &alist);
spin_unlock_bh(&msk->pm.lock);
}
@@ -1636,8 +1676,8 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static int mptcp_nl_fill_addr(struct sk_buff *skb,
- struct mptcp_pm_addr_entry *entry)
+int mptcp_nl_fill_addr(struct sk_buff *skb,
+ struct mptcp_pm_addr_entry *entry)
{
struct mptcp_addr_info *addr = &entry->addr;
struct nlattr *attr;
@@ -1675,7 +1715,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
@@ -1725,8 +1765,13 @@ fail:
return ret;
}
-int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ return mptcp_pm_get_addr(skb, info);
+}
+
+int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb)
{
struct net *net = sock_net(msg->sk);
struct mptcp_pm_addr_entry *entry;
@@ -1768,6 +1813,12 @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
return msg->len;
}
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return mptcp_pm_dump_addr(msg, cb);
+}
+
static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
{
struct nlattr *attr = info->attrs[id];
@@ -1882,66 +1933,63 @@ next:
return ret;
}
-int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup)
+int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
{
- struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
+ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
MPTCP_PM_ADDR_FLAG_FULLMESH;
+ struct net *net = sock_net(skb->sk);
struct mptcp_pm_addr_entry *entry;
+ struct pm_nl_pernet *pernet;
u8 lookup_by_id = 0;
+ u8 bkup = 0;
+ int ret;
+
+ pernet = pm_nl_get_pernet(net);
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
+ if (ret < 0)
+ return ret;
- if (addr->addr.family == AF_UNSPEC) {
+ if (addr.addr.family == AF_UNSPEC) {
lookup_by_id = 1;
- if (!addr->addr.id)
+ if (!addr.addr.id) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
return -EOPNOTSUPP;
+ }
}
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
+
spin_lock_bh(&pernet->lock);
- entry = __lookup_addr(pernet, &addr->addr, lookup_by_id);
+ entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) :
+ __lookup_addr(pernet, &addr.addr);
if (!entry) {
spin_unlock_bh(&pernet->lock);
+ GENL_SET_ERR_MSG(info, "address not found");
return -EINVAL;
}
- if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
spin_unlock_bh(&pernet->lock);
+ GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
}
- changed = (addr->flags ^ entry->flags) & mask;
- entry->flags = (entry->flags & ~mask) | (addr->flags & mask);
- *addr = *entry;
+ changed = (addr.flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
+ addr = *entry;
spin_unlock_bh(&pernet->lock);
- mptcp_nl_set_flags(net, &addr->addr, bkup, changed);
+ mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
return 0;
}
int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
- struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
- struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
- struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct net *net = sock_net(skb->sk);
- u8 bkup = 0;
- int ret;
-
- ret = mptcp_pm_parse_entry(attr, info, false, &addr);
- if (ret < 0)
- return ret;
-
- if (attr_rem) {
- ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
- if (ret < 0)
- return ret;
- }
-
- if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
- bkup = 1;
-
- return mptcp_pm_set_flags(net, token, &addr, &remote, bkup);
+ return mptcp_pm_set_flags(skb, info);
}
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
@@ -2014,7 +2062,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
const struct mptcp_subflow_context *sf;
u8 sk_err;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
return -EMSGSIZE;
if (mptcp_event_add_subflow(skb, ssk))
@@ -2072,7 +2120,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
- int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
+ int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
if (err)
return err;
@@ -2100,7 +2148,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
@@ -2135,7 +2183,7 @@ void mptcp_event_addr_announced(const struct sock *ssk,
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
@@ -2251,7 +2299,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
goto nla_put_failure;
break;
case MPTCP_EVENT_CLOSED:
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_ANNOUNCED:
@@ -2281,7 +2329,7 @@ nla_put_failure:
nlmsg_free(skb);
}
-static struct genl_family mptcp_genl_family __ro_after_init = {
+struct genl_family mptcp_genl_family __ro_after_init = {
.name = MPTCP_PM_NAME,
.version = MPTCP_PM_VER,
.netnsok = true,
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index bc97cc30f0..8eaa9fbe3e 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -6,6 +6,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
@@ -106,19 +107,26 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
return -EINVAL;
}
+static struct mptcp_pm_addr_entry *
+mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (entry->addr.id == id)
+ return entry;
+ }
+ return NULL;
+}
+
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex)
{
- struct mptcp_pm_addr_entry *entry, *match = NULL;
+ struct mptcp_pm_addr_entry *match;
spin_lock_bh(&msk->pm.lock);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (id == entry->addr.id) {
- match = entry;
- break;
- }
- }
+ match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
spin_unlock_bh(&msk->pm.lock);
if (match) {
*flags = match->flags;
@@ -157,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
+bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry *entry;
+ bool backup = false;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, skc, false)) {
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+
+ return backup;
+}
+
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
@@ -261,7 +287,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
- struct mptcp_pm_addr_entry *match = NULL;
+ struct mptcp_pm_addr_entry *match;
struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
LIST_HEAD(free_list);
@@ -298,13 +324,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
- list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
- if (entry->addr.id == id_val) {
- match = entry;
- break;
- }
- }
-
+ match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
release_sock(sk);
@@ -334,7 +354,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry local = { 0 };
struct mptcp_addr_info addr_r;
- struct mptcp_addr_info addr_l;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -360,25 +379,31 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ err = mptcp_pm_parse_entry(laddr, info, true, &local);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
+ if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ GENL_SET_ERR_MSG(info, "invalid addr flags");
+ err = -EINVAL;
+ goto create_err;
+ }
+ local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- local.addr = addr_l;
err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
@@ -387,7 +412,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
- err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
+ err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
release_sock(sk);
@@ -540,35 +565,194 @@ destroy_err:
return err;
}
-int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup)
+int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
{
+ struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
+ struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct net *net = sock_net(skb->sk);
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
u32 token_val;
+ u8 bkup = 0;
token_val = nla_get_u32(token);
msk = mptcp_token_get_sock(net, token_val);
- if (!msk)
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
return ret;
+ }
sk = (struct sock *)msk;
- if (!mptcp_pm_is_userspace(msk))
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "userspace PM not selected");
+ goto set_flags_err;
+ }
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &loc);
+ if (ret < 0)
goto set_flags_err;
- if (loc->addr.family == AF_UNSPEC ||
- rem->addr.family == AF_UNSPEC)
+ if (attr_rem) {
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
+ if (ret < 0)
+ goto set_flags_err;
+ }
+
+ if (loc.addr.family == AF_UNSPEC ||
+ rem.addr.family == AF_UNSPEC) {
+ GENL_SET_ERR_MSG(info, "invalid address families");
+ ret = -EINVAL;
goto set_flags_err;
+ }
+
+ if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
lock_sock(sk);
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
release_sock(sk);
set_flags_err:
sock_put(sk);
return ret;
}
+
+int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct id_bitmap {
+ DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1);
+ } *bitmap;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(msg->sk);
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_sock *msk;
+ struct nlattr *token;
+ int ret = -EINVAL;
+ struct sock *sk;
+ void *hdr;
+
+ bitmap = (struct id_bitmap *)cb->ctx;
+ token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+
+ msk = mptcp_token_get_sock(net, nla_get_u32(token));
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return ret;
+ }
+
+ sk = (struct sock *)msk;
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto out;
+ }
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (test_bit(entry->addr.id, bitmap->map))
+ continue;
+
+ hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &mptcp_genl_family,
+ NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
+ if (!hdr)
+ break;
+
+ if (mptcp_nl_fill_addr(msg, entry) < 0) {
+ genlmsg_cancel(msg, hdr);
+ break;
+ }
+
+ __set_bit(entry->addr.id, bitmap->map);
+ genlmsg_end(msg, hdr);
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+ ret = msg->len;
+
+out:
+ sock_put(sk);
+ return ret;
+}
+
+int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct mptcp_pm_addr_entry addr, *entry;
+ struct net *net = sock_net(skb->sk);
+ struct mptcp_sock *msk;
+ struct sk_buff *msg;
+ int ret = -EINVAL;
+ struct sock *sk;
+ void *reply;
+
+ msk = mptcp_token_get_sock(net, nla_get_u32(token));
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return ret;
+ }
+
+ sk = (struct sock *)msk;
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto out;
+ }
+
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
+ if (ret < 0)
+ goto out;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
+ info->genlhdr->cmd);
+ if (!reply) {
+ GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
+ ret = -EMSGSIZE;
+ goto fail;
+ }
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id);
+ if (!entry) {
+ GENL_SET_ERR_MSG(info, "address not found");
+ ret = -EINVAL;
+ goto unlock_fail;
+ }
+
+ ret = mptcp_nl_fill_addr(msg, entry);
+ if (ret)
+ goto unlock_fail;
+
+ genlmsg_end(msg, reply);
+ ret = genlmsg_reply(msg, info);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+ sock_put(sk);
+ return ret;
+
+unlock_fail:
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+fail:
+ nlmsg_free(msg);
+out:
+ sock_put(sk);
+ return ret;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2b921af271..ff8292d0cf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -15,12 +15,12 @@
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/protocol.h>
-#include <net/tcp.h>
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+#include <net/hotdata.h>
#include <net/xfrm.h>
#include <asm/ioctls.h>
#include "protocol.h"
@@ -350,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+ if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
goto drop;
+ }
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -410,6 +412,7 @@ static void mptcp_close_wake_up(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
+/* called under the msk socket lock */
static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -441,16 +444,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
}
}
+/* can be called with no lock acquired */
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (READ_ONCE(msk->rcv_data_fin) &&
- ((1 << sk->sk_state) &
+ ((1 << inet_sk_state_load(sk)) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
- if (msk->ack_seq == rcv_data_fin_seq) {
+ if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
if (seq)
*seq = rcv_data_fin_seq;
@@ -705,6 +709,8 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
} while (more_data_avail);
+ if (moved > 0)
+ msk->last_data_recv = tcp_jiffies32;
*bytes += moved;
return done;
}
@@ -748,7 +754,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
- msk->ack_seq = end_seq;
+ WRITE_ONCE(msk->ack_seq, end_seq);
moved = true;
}
return moved;
@@ -840,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
- if (__mptcp_rmem(sk) > sk_rbuf) {
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
+ if (__mptcp_rmem(sk) > sk_rbuf)
return;
- }
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
@@ -985,6 +989,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}
+/* called under both the msk socket lock and the data lock */
static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1033,13 +1038,15 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (snd_una == READ_ONCE(msk->snd_nxt) &&
- snd_una == READ_ONCE(msk->write_seq)) {
+ if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_rtx_timer(sk);
} else {
mptcp_reset_rtx_timer(sk);
}
+
+ if (mptcp_pending_data_fin_ack(sk))
+ mptcp_schedule_work(sk);
}
static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1266,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1415,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
mptcp_for_each_subflow(msk, subflow) {
+ bool backup = subflow->backup || subflow->request_bkup;
+
trace_mptcp_subflow_get_send(subflow);
ssk = mptcp_subflow_tcp_sock(subflow);
if (!mptcp_subflow_active(subflow))
continue;
tout = max(tout, mptcp_timeout_from_subflow(subflow));
- nr_active += !subflow->backup;
+ nr_active += !backup;
pace = subflow->avg_pacing_rate;
if (unlikely(!pace)) {
/* init pacing rate from socket */
@@ -1432,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace);
- if (linger_time < send_info[subflow->backup].linger_time) {
- send_info[subflow->backup].ssk = ssk;
- send_info[subflow->backup].linger_time = linger_time;
+ if (linger_time < send_info[backup].linger_time) {
+ send_info[backup].ssk = ssk;
+ send_info[backup].linger_time = linger_time;
}
}
__mptcp_set_timeout(sk, tout);
@@ -1500,7 +1509,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
*/
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
- msk->snd_nxt = snd_nxt_new;
+ WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
}
}
@@ -1552,6 +1561,8 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
err = copied;
out:
+ if (err > 0)
+ msk->last_data_sent = tcp_jiffies32;
return err;
}
@@ -1687,15 +1698,6 @@ out:
}
}
-static void mptcp_set_nospace(struct sock *sk)
-{
- /* enable autotune */
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-
- /* will be cleared on avail space */
- set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
-}
-
static int mptcp_disconnect(struct sock *sk, int flags);
static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
@@ -1766,6 +1768,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy,
return 0;
}
+/* open-code sk_stream_memory_free() plus sent limit computation to
+ * avoid indirect calls in fast-path.
+ * Called under the msk socket lock, so we can avoid a bunch of ONCE
+ * annotations.
+ */
+static u32 mptcp_send_limit(const struct sock *sk)
+{
+ const struct mptcp_sock *msk = mptcp_sk(sk);
+ u32 limit, not_sent;
+
+ if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf))
+ return 0;
+
+ limit = mptcp_notsent_lowat(sk);
+ if (limit == UINT_MAX)
+ return UINT_MAX;
+
+ not_sent = msk->write_seq - msk->snd_nxt;
+ if (not_sent >= limit)
+ return 0;
+
+ return limit - not_sent;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1810,6 +1836,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct mptcp_data_frag *dfrag;
bool dfrag_collapsed;
size_t psize, offset;
+ u32 copy_limit;
+
+ /* ensure fitting the notsent_lowat() constraint */
+ copy_limit = mptcp_send_limit(sk);
+ if (!copy_limit)
+ goto wait_for_memory;
/* reuse tail pfrag, if possible, or carve a new one from the
* page allocator
@@ -1817,9 +1849,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dfrag = mptcp_pending_tail(sk);
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
if (!dfrag_collapsed) {
- if (!sk_stream_memory_free(sk))
- goto wait_for_memory;
-
if (!mptcp_page_frag_refill(sk, pfrag))
goto wait_for_memory;
@@ -1834,6 +1863,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
offset = dfrag->offset + dfrag->data_len;
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
+ psize = min_t(size_t, psize, copy_limit);
total_ts = psize + frag_truesize;
if (!sk_wmem_schedule(sk, total_ts))
@@ -1869,7 +1899,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
continue;
wait_for_memory:
- mptcp_set_nospace(sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
__mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
@@ -2033,7 +2063,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
- tcp_sk(ssk)->window_clamp = window_clamp;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
@@ -2115,7 +2145,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
skb = skb_peek(&msk->receive_queue);
if (skb) {
- u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
if (hint_val >= INT_MAX)
return INT_MAX;
@@ -2542,7 +2572,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
- tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(tcp_sk);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
unlock_sock_fast(tcp_sk, slow);
@@ -2759,7 +2789,7 @@ static void __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->rmem_fwd_alloc = 0;
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -2770,6 +2800,9 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
msk->subflow_id = 1;
+ msk->last_data_sent = tcp_jiffies32;
+ msk->last_data_recv = tcp_jiffies32;
+ msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
@@ -2783,7 +2816,8 @@ static void mptcp_ca_reset(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+ strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name,
+ sizeof(mptcp_sk(sk)->ca_name));
/* no need to keep a reference to the ops, the name will suffice */
tcp_cleanup_congestion_control(sk);
@@ -2884,9 +2918,14 @@ void mptcp_set_state(struct sock *sk, int state)
if (oldstate != TCP_ESTABLISHED)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
break;
-
+ case TCP_CLOSE_WAIT:
+ /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state:
+ * MPTCP "accepted" sockets will be created later on. So no
+ * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT.
+ */
+ break;
default:
- if (oldstate == TCP_ESTABLISHED)
+ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
}
@@ -2975,7 +3014,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->rmem_fwd_alloc);
+ WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -3150,16 +3189,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->recovery = false;
- msk->can_ack = false;
- msk->fully_established = false;
- msk->rcv_data_fin = false;
- msk->snd_data_fin_enable = false;
- msk->rcv_fastclose = false;
- msk->use_64bit_ack = false;
- msk->bytes_consumed = 0;
+ WRITE_ONCE(msk->can_ack, false);
+ WRITE_ONCE(msk->fully_established, false);
+ WRITE_ONCE(msk->rcv_data_fin, false);
+ WRITE_ONCE(msk->snd_data_fin_enable, false);
+ WRITE_ONCE(msk->rcv_fastclose, false);
+ WRITE_ONCE(msk->use_64bit_ack, false);
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
+ msk->bytes_consumed = 0;
msk->bytes_acked = 0;
msk->bytes_received = 0;
msk->bytes_sent = 0;
@@ -3250,17 +3289,17 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
mptcp_copy_ip_options(nsk, sk);
msk = mptcp_sk(nsk);
- msk->local_key = subflow_req->local_key;
- msk->token = subflow_req->token;
+ WRITE_ONCE(msk->local_key, subflow_req->local_key);
+ WRITE_ONCE(msk->token, subflow_req->token);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
- msk->write_seq = subflow_req->idsn + 1;
- msk->snd_nxt = msk->write_seq;
- msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
+ WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
+ WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3363,9 +3402,6 @@ void __mptcp_data_acked(struct sock *sk)
__mptcp_clean_una(sk);
else
__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
-
- if (mptcp_pending_data_fin_ack(sk))
- mptcp_schedule_work(sk);
}
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
@@ -3703,6 +3739,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
+
+ WRITE_ONCE(msk->write_seq, subflow->idsn);
+ WRITE_ONCE(msk->snd_nxt, subflow->idsn);
+ WRITE_ONCE(msk->snd_una, subflow->idsn);
if (likely(!__mptcp_check_fallback(msk)))
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
@@ -3767,6 +3807,7 @@ static struct proto mptcp_prot = {
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
+ .stream_memory_free = mptcp_stream_memory_free,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
@@ -3849,11 +3890,10 @@ unlock:
}
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *ssk, *newsk;
- int err;
pr_debug("msk=%p", msk);
@@ -3865,9 +3905,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
return -EINVAL;
pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, &err, kern);
+ newsk = inet_csk_accept(ssk, arg);
if (!newsk)
- return err;
+ return arg->err;
pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
@@ -3888,7 +3928,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
@@ -3917,7 +3957,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
}
} else {
tcpfallback:
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
/* we are being invoked after accepting a non-mp-capable
@@ -3938,12 +3978,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- if (sk_stream_is_writeable(sk))
+ if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;
- mptcp_set_nospace(sk);
- smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
- if (sk_stream_is_writeable(sk))
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */
+ if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;
return 0;
@@ -4137,7 +4177,7 @@ int __init mptcp_proto_v6_init(void)
int err;
mptcp_v6_prot = mptcp_prot;
- strcpy(mptcp_v6_prot.name, "MPTCPv6");
+ strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name));
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 07f6242afc..8357046732 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -12,8 +12,7 @@
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
-
-#include "mptcp_pm_gen.h"
+#include <net/rstreason.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -113,10 +112,9 @@
#define MPTCP_RST_TRANSIENT BIT(0)
/* MPTCP socket atomic flags */
-#define MPTCP_NOSPACE 1
-#define MPTCP_WORK_RTX 2
-#define MPTCP_FALLBACK_DONE 4
-#define MPTCP_WORK_CLOSE_SUBFLOW 5
+#define MPTCP_WORK_RTX 1
+#define MPTCP_FALLBACK_DONE 2
+#define MPTCP_WORK_CLOSE_SUBFLOW 3
/* MPTCP socket release cb flags */
#define MPTCP_PUSH_PENDING 1
@@ -260,8 +258,10 @@ struct mptcp_data_frag {
struct mptcp_sock {
/* inet_connection_sock must be the first member */
struct inet_connection_sock sk;
- u64 local_key;
- u64 remote_key;
+ u64 local_key; /* protected by the first subflow socket lock
+ * lockless access read
+ */
+ u64 remote_key; /* same as above */
u64 write_seq;
u64 bytes_sent;
u64 snd_nxt;
@@ -281,6 +281,9 @@ struct mptcp_sock {
u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
+ u32 last_data_sent;
+ u32 last_data_recv;
+ u32 last_ack_recv;
unsigned long timer_ival;
u32 token;
int rmem_released;
@@ -306,6 +309,10 @@ struct mptcp_sock {
in_accept_queue:1,
free_first:1,
rcvspace_init:1;
+ u32 notsent_lowat;
+ int keepalive_cnt;
+ int keepalive_idle;
+ int keepalive_intvl;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -341,12 +348,30 @@ struct mptcp_sock {
#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+extern struct genl_family mptcp_genl_family;
+
static inline void msk_owned_by_me(const struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);
}
+#ifdef CONFIG_DEBUG_NET
+/* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */
+#undef tcp_sk
+#define tcp_sk(ptr) ({ \
+ typeof(ptr) _ptr = (ptr); \
+ WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \
+ container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \
+})
+#define mptcp_sk(ptr) ({ \
+ typeof(ptr) _ptr = (ptr); \
+ WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \
+ container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \
+})
+
+#else /* !CONFIG_DEBUG_NET */
#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
+#endif
/* the msk socket don't use the backlog, also account for the bulk
* free memory
@@ -400,7 +425,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+ if (msk->snd_una == msk->snd_nxt)
return NULL;
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
@@ -418,6 +443,7 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
+ request_bkup : 1,
csum_reqd : 1,
allow_join_id0 : 1;
u8 local_id;
@@ -538,7 +564,7 @@ struct mptcp_subflow_context {
static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
/* Use RCU on icsk_ulp_data only for sock diag code */
return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
@@ -558,6 +584,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
WRITE_ONCE(subflow->local_id, -1);
}
+/* Convert reset reasons in MPTCP to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_mptcp_reason(u32 reason)
+{
+ switch (reason) {
+ case MPTCP_RST_EUNSPEC:
+ return SK_RST_REASON_MPTCP_RST_EUNSPEC;
+ case MPTCP_RST_EMPTCP:
+ return SK_RST_REASON_MPTCP_RST_EMPTCP;
+ case MPTCP_RST_ERESOURCE:
+ return SK_RST_REASON_MPTCP_RST_ERESOURCE;
+ case MPTCP_RST_EPROHIBIT:
+ return SK_RST_REASON_MPTCP_RST_EPROHIBIT;
+ case MPTCP_RST_EWQ2BIG:
+ return SK_RST_REASON_MPTCP_RST_EWQ2BIG;
+ case MPTCP_RST_EBADPERF:
+ return SK_RST_REASON_MPTCP_RST_EBADPERF;
+ case MPTCP_RST_EMIDDLEBOX:
+ return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX;
+ default:
+ /* It should not happen, or else errors may occur
+ * in MPTCP layer
+ */
+ return SK_RST_REASON_ERROR;
+ }
+}
+
+static inline void
+mptcp_send_active_reset_reason(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(subflow->reset_reason);
+ tcp_send_active_reset(sk, GFP_ATOMIC, reason);
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -622,6 +685,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
@@ -790,14 +854,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline u32 mptcp_notsent_lowat(const struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ u32 val;
+
+ val = READ_ONCE(mptcp_sk(sk)->notsent_lowat);
+ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+}
+
+static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake)
+{
+ const struct mptcp_sock *msk = mptcp_sk(sk);
+ u32 notsent_bytes;
+
+ notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt);
+ return (notsent_bytes << wake) < mptcp_notsent_lowat(sk);
+}
+
+static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake)
+{
+ return mptcp_stream_memory_free(sk, wake) &&
+ __sk_stream_is_writeable(sk, wake);
+}
+
static inline void mptcp_write_space(struct sock *sk)
{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (mptcp_stream_memory_free(sk, 1))
+ sk_stream_write_space(sk);
}
static inline void __mptcp_sync_sndbuf(struct sock *sk)
@@ -808,7 +894,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
return;
- new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0];
+ new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]);
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf);
@@ -928,21 +1014,15 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
-int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup);
-int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup);
-int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
- struct mptcp_pm_addr_entry *loc,
- struct mptcp_pm_addr_entry *rem, u8 bkup);
+int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
+int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
-void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
@@ -958,6 +1038,8 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo
const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
+int mptcp_nl_fill_addr(struct sk_buff *skb,
+ struct mptcp_pm_addr_entry *entry);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
@@ -1022,6 +1104,18 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
+bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb);
+int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info);
+int mptcp_userspace_pm_get_addr(struct sk_buff *skb,
+ struct genl_info *info);
static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 4ab0693c06..4a7fd0508a 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -51,6 +51,28 @@ struct mptcp_sched_ops *mptcp_sched_find(const char *name)
return ret;
}
+/* Build string with list of available scheduler values.
+ * Similar to tcp_get_available_congestion_control()
+ */
+void mptcp_get_available_schedulers(char *buf, size_t maxlen)
+{
+ struct mptcp_sched_ops *sched;
+ size_t offs = 0;
+
+ rcu_read_lock();
+ spin_lock(&mptcp_sched_list_lock);
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", sched->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
+ }
+ spin_unlock(&mptcp_sched_list_lock);
+ rcu_read_unlock();
+}
+
int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
{
if (!sched->get_subflow)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index ef3edba754..f9a4fb17b5 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -181,8 +181,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
switch (optname) {
case SO_KEEPALIVE:
- mptcp_sol_socket_sync_intval(msk, optname, val);
- return 0;
case SO_DEBUG:
case SO_MARK:
case SO_PRIORITY:
@@ -618,26 +616,42 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
}
if (ret == 0)
- strcpy(msk->ca_name, name);
+ strscpy(msk->ca_name, name, sizeof(msk->ca_name));
release_sock(sk);
return ret;
}
-static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
+static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
+ int (*set_val)(struct sock *, int),
+ int *msk_val, int val)
{
struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int val;
+ int err = 0;
- if (optlen < sizeof(int))
- return -EINVAL;
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int ret;
- if (copy_from_sockptr(&val, optval, sizeof(val)))
- return -EFAULT;
+ lock_sock(ssk);
+ ret = set_val(ssk, val);
+ err = err ? : ret;
+ release_sock(ssk);
+ }
+
+ if (!err) {
+ *msk_val = val;
+ sockopt_seq_inc(msk);
+ }
+
+ return err;
+}
+
+static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
- lock_sock(sk);
sockopt_seq_inc(msk);
msk->cork = !!val;
mptcp_for_each_subflow(msk, subflow) {
@@ -649,25 +663,15 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva
}
if (!val)
mptcp_check_and_set_pending(sk);
- release_sock(sk);
return 0;
}
-static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
+static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- int val;
-
- if (optlen < sizeof(int))
- return -EINVAL;
-
- if (copy_from_sockptr(&val, optval, sizeof(val)))
- return -EFAULT;
- lock_sock(sk);
sockopt_seq_inc(msk);
msk->nodelay = !!val;
mptcp_for_each_subflow(msk, subflow) {
@@ -679,8 +683,6 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
}
if (val)
mptcp_check_and_set_pending(sk);
- release_sock(sk);
-
return 0;
}
@@ -803,25 +805,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
int ret, val;
switch (optname) {
- case TCP_INQ:
- ret = mptcp_get_int_option(msk, optval, optlen, &val);
- if (ret)
- return ret;
- if (val < 0 || val > 1)
- return -EINVAL;
-
- lock_sock(sk);
- msk->recvmsg_inq = !!val;
- release_sock(sk);
- return 0;
case TCP_ULP:
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
- case TCP_CORK:
- return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
- case TCP_NODELAY:
- return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
case TCP_DEFER_ACCEPT:
/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
@@ -834,7 +821,50 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
optval, optlen);
}
- return -EOPNOTSUPP;
+ ret = mptcp_get_int_option(msk, optval, optlen, &val);
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+ switch (optname) {
+ case TCP_INQ:
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ msk->recvmsg_inq = !!val;
+ break;
+ case TCP_NOTSENT_LOWAT:
+ WRITE_ONCE(msk->notsent_lowat, val);
+ mptcp_write_space(sk);
+ break;
+ case TCP_CORK:
+ ret = __mptcp_setsockopt_sol_tcp_cork(msk, val);
+ break;
+ case TCP_NODELAY:
+ ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
+ break;
+ case TCP_KEEPIDLE:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
+ &tcp_sock_set_keepidle_locked,
+ &msk->keepalive_idle, val);
+ break;
+ case TCP_KEEPINTVL:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
+ &tcp_sock_set_keepintvl,
+ &msk->keepalive_intvl, val);
+ break;
+ case TCP_KEEPCNT:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
+ &tcp_sock_set_keepcnt,
+ &msk->keepalive_cnt,
+ val);
+ break;
+ default:
+ ret = -ENOPROTOOPT;
+ }
+
+ release_sock(sk);
+ return ret;
}
int mptcp_setsockopt(struct sock *sk, int level, int optname,
@@ -907,6 +937,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
struct sock *sk = (struct sock *)msk;
u32 flags = 0;
bool slow;
+ u32 now;
memset(info, 0, sizeof(*info));
@@ -935,14 +966,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
info->mptcpi_flags = flags;
- mptcp_data_lock(sk);
- info->mptcpi_snd_una = msk->snd_una;
- info->mptcpi_rcv_nxt = msk->ack_seq;
- info->mptcpi_bytes_acked = msk->bytes_acked;
- mptcp_data_unlock(sk);
slow = lock_sock_fast(sk);
- info->mptcpi_csum_enabled = msk->csum_enabled;
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
info->mptcpi_token = msk->token;
info->mptcpi_write_seq = msk->write_seq;
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
@@ -951,7 +977,17 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_retrans = msk->bytes_retrans;
info->mptcpi_subflows_total = info->mptcpi_subflows +
__mptcp_has_initial_subflow(msk);
+ now = tcp_jiffies32;
+ info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent);
+ info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv);
unlock_sock_fast(sk, slow);
+
+ mptcp_data_lock(sk);
+ info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv);
+ info->mptcpi_snd_una = msk->snd_una;
+ info->mptcpi_rcv_nxt = msk->ack_seq;
+ info->mptcpi_bytes_acked = msk->bytes_acked;
+ mptcp_data_unlock(sk);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -963,6 +999,10 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
if (get_user(len, optlen))
return -EFAULT;
+ /* When used only to check if a fallback to TCP happened. */
+ if (len == 0)
+ return 0;
+
len = min_t(unsigned int, len, sizeof(struct mptcp_info));
mptcp_diag_fill_info(msk, &m_info);
@@ -1331,6 +1371,8 @@ static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
+ struct sock *sk = (void *)msk;
+
switch (optname) {
case TCP_ULP:
case TCP_CONGESTION:
@@ -1349,6 +1391,22 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
case TCP_NODELAY:
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
+ case TCP_KEEPIDLE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_idle ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
+ case TCP_KEEPINTVL:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_intvl ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
+ case TCP_KEEPCNT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_cnt ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
+ case TCP_NOTSENT_LOWAT:
+ return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
+ case TCP_IS_MPTCP:
+ return mptcp_put_int_option(msk, optval, optlen, 1);
}
return -EOPNOTSUPP;
}
@@ -1464,6 +1522,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
+ tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
+ tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
+ tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
@@ -1530,7 +1591,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, space);
- tcp_sk(ssk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
unlock_sock_fast(ssk, slow);
}
return 0;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 13f66d11b7..c330946384 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -15,13 +15,12 @@
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/protocol.h>
-#include <net/tcp.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/ip6_route.h>
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
-#include <uapi/linux/mptcp.h>
+
#include "protocol.h"
#include "mib.h"
@@ -75,7 +74,8 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
- subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
subflow_req->local_nonce,
subflow_req->remote_nonce, hmac);
@@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
return NULL;
}
subflow_req->local_id = local_id;
+ subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req);
return msk;
}
@@ -151,8 +152,10 @@ static int subflow_check_req(struct request_sock *req,
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
return -EINVAL;
+ }
#endif
mptcp_get_options(skb, &mp_opt);
@@ -166,6 +169,9 @@ static int subflow_check_req(struct request_sock *req,
return 0;
} else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
+
+ if (mp_opt.backup)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
}
if (opt_mp_capable && listener->request_mptcp) {
@@ -220,6 +226,7 @@ again:
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
return -EPERM;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
@@ -228,10 +235,12 @@ again:
subflow_req_create_thmac(subflow_req);
if (unlikely(req->syncookie)) {
- if (mptcp_can_accept_new_subflow(subflow_req->msk))
- subflow_init_req_cookie_join_save(subflow_req, skb);
- else
+ if (!mptcp_can_accept_new_subflow(subflow_req->msk)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
return -EPERM;
+ }
+
+ subflow_init_req_cookie_join_save(subflow_req, skb);
}
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
@@ -282,10 +291,21 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
}
EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
+static enum sk_rst_reason mptcp_get_rst_reason(const struct sk_buff *skb)
+{
+ const struct mptcp_ext *mpext = mptcp_get_ext(skb);
+
+ if (!mpext)
+ return SK_RST_REASON_NOT_SPECIFIED;
+
+ return sk_rst_convert_mptcp_reason(mpext->reset_reason);
+}
+
static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
struct dst_entry *dst;
int err;
@@ -293,7 +313,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
- dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
+ dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
@@ -303,7 +323,8 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp_request_sock_ops.send_reset(sk, skb);
+ tcp_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
@@ -352,7 +373,8 @@ static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
struct dst_entry *dst;
int err;
@@ -360,7 +382,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
- dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
+ dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
@@ -370,7 +392,8 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp6_request_sock_ops.send_reset(sk, skb);
+ tcp6_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
#endif
@@ -406,7 +429,7 @@ void mptcp_subflow_reset(struct sock *ssk)
/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
mptcp_schedule_work(sk);
@@ -558,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+ if (subflow->backup)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
+
if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
@@ -595,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk)
return err;
subflow_set_local_id(subflow, err);
+ subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk);
+
return 0;
}
@@ -714,7 +742,8 @@ static bool subflow_hmac_valid(const struct request_sock *req,
if (!msk)
return false;
- subflow_generate_hmac(msk->remote_key, msk->local_key,
+ subflow_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
@@ -774,6 +803,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
+ enum sk_rst_reason reason;
struct mptcp_sock *owner;
struct sock *child;
@@ -873,13 +903,18 @@ create_child:
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
- if (!mptcp_finish_join(child))
+ if (!mptcp_finish_join(child)) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(child);
+
+ subflow_add_reset_reason(skb, subflow->reset_reason);
goto dispose_child;
+ }
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
@@ -887,7 +922,7 @@ create_child:
}
/* check for expected invariant - should never trigger, just help
- * catching eariler subtle bugs
+ * catching earlier subtle bugs
*/
WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
(!mptcp_subflow_ctx(child) ||
@@ -899,7 +934,8 @@ dispose_child:
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
- req->rsk_ops->send_reset(sk, skb);
+ reason = mptcp_get_rst_reason(skb);
+ req->rsk_ops->send_reset(sk, skb, reason);
/* The last child reference will be released by the caller */
return child;
@@ -1092,6 +1128,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
if (mpext->data_fin == 1) {
+ u64 data_fin_seq;
+
if (data_len == 1) {
bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
mpext->dsn64);
@@ -1104,26 +1142,26 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
*/
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
- } else {
- if (updated)
- mptcp_schedule_work((struct sock *)msk);
-
- return MAPPING_DATA_FIN;
}
- } else {
- u64 data_fin_seq = mpext->data_seq + data_len - 1;
- /* If mpext->data_seq is a 32-bit value, data_fin_seq
- * must also be limited to 32 bits.
- */
- if (!mpext->dsn64)
- data_fin_seq &= GENMASK_ULL(31, 0);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
- mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
- pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
- data_fin_seq, mpext->dsn64);
+ return MAPPING_DATA_FIN;
}
+ data_fin_seq = mpext->data_seq + data_len - 1;
+
+ /* If mpext->data_seq is a 32-bit value, data_fin_seq must also
+ * be limited to 32 bits.
+ */
+ if (!mpext->dsn64)
+ data_fin_seq &= GENMASK_ULL(31, 0);
+
+ mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
+ pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+ data_fin_seq, mpext->dsn64);
+
/* Adjust for DATA_FIN using 1 byte of sequence space */
data_len--;
}
@@ -1192,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
- u32 incr;
+ struct tcp_sock *tp = tcp_sk(ssk);
+ u32 offset, incr, avail_len;
- incr = limit >= skb->len ? skb->len + fin : limit;
+ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
+ if (WARN_ON_ONCE(offset > skb->len))
+ goto out;
- pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
- subflow->map_subflow_seq);
+ avail_len = skb->len - offset;
+ incr = limit >= avail_len ? avail_len + fin : limit;
+
+ pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len,
+ offset, subflow->map_subflow_seq);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
tcp_sk(ssk)->copied_seq += incr;
+
+out:
if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
sk_eat_skb(ssk, skb);
if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
@@ -1234,7 +1280,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
- /* greceful failure can happen only on the MPC subflow */
+ /* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
return;
@@ -1336,7 +1382,7 @@ reset:
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
WRITE_ONCE(subflow->data_avail, false);
return false;
}
@@ -1550,8 +1596,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key_valid = 1;
- subflow->remote_key = msk->remote_key;
- subflow->local_key = msk->local_key;
+ subflow->remote_key = READ_ONCE(msk->remote_key);
+ subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
@@ -1976,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
+ new_ctx->request_bkup = subflow_req->request_bkup;
WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index bfff53e668..4fc39fa2e2 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -52,14 +52,19 @@ static struct mptcp_subflow_context *build_ctx(struct kunit *test)
static struct mptcp_sock *build_msk(struct kunit *test)
{
struct mptcp_sock *msk;
+ struct sock *sk;
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
sock_net_set((struct sock *)msk, &init_net);
+ sk = (struct sock *)msk;
+
/* be sure the token helpers can dereference sk->sk_prot */
- ((struct sock *)msk)->sk_prot = &tcp_prot;
+ sk->sk_prot = &tcp_prot;
+ sk->sk_protocol = IPPROTO_MPTCP;
+
return msk;
}
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 374412ed78..ef0f8f7382 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -325,6 +325,7 @@ struct ncsi_dev_priv {
spinlock_t lock; /* Protect the NCSI device */
unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
+ unsigned int channel_probe_id;/* Current cahnnel ID during probe */
struct list_head packages; /* List of packages */
struct ncsi_channel *hot_channel; /* Channel was ever active */
struct ncsi_request requests[256]; /* Request table */
@@ -343,6 +344,7 @@ struct ncsi_dev_priv {
bool multi_package; /* Enable multiple packages */
bool mlx_multi_host; /* Enable multi host Mellanox */
u32 package_whitelist; /* Packages to configure */
+ unsigned char channel_count; /* Num of channels to probe */
};
struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 745c788f1d..5ecf611c88 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
break;
case ncsi_dev_state_suspend_gls:
- ndp->pending_req_num = np->channel_num;
+ ndp->pending_req_num = 1;
nca.type = NCSI_PKT_CMD_GLS;
nca.package = np->id;
+ nca.channel = ndp->channel_probe_id;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ ndp->channel_probe_id++;
- nd->state = ncsi_dev_state_suspend_dcnt;
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- nca.channel = nc->id;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
+ if (ndp->channel_probe_id == ndp->channel_count) {
+ ndp->channel_probe_id = 0;
+ nd->state = ncsi_dev_state_suspend_dcnt;
}
break;
@@ -1345,7 +1347,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
struct ncsi_package *np;
- struct ncsi_channel *nc;
struct ncsi_cmd_arg nca;
unsigned char index;
int ret;
@@ -1423,23 +1424,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
- case ncsi_dev_state_probe_cis:
- ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
-
- /* Clear initial state */
- nca.type = NCSI_PKT_CMD_CIS;
- nca.package = ndp->active_package->id;
- for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
- nca.channel = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
- nd->state = ncsi_dev_state_probe_gvi;
- if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
- nd->state = ncsi_dev_state_probe_keep_phy;
- break;
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1452,14 +1436,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
+ case ncsi_dev_state_probe_cis:
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
np = ndp->active_package;
- ndp->pending_req_num = np->channel_num;
+ ndp->pending_req_num = 1;
- /* Retrieve version, capability or link status */
- if (nd->state == ncsi_dev_state_probe_gvi)
+ /* Clear initial state Retrieve version, capability or link status */
+ if (nd->state == ncsi_dev_state_probe_cis)
+ nca.type = NCSI_PKT_CMD_CIS;
+ else if (nd->state == ncsi_dev_state_probe_gvi)
nca.type = NCSI_PKT_CMD_GVI;
else if (nd->state == ncsi_dev_state_probe_gc)
nca.type = NCSI_PKT_CMD_GC;
@@ -1467,19 +1454,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nca.type = NCSI_PKT_CMD_GLS;
nca.package = np->id;
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- nca.channel = nc->id;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
+ nca.channel = ndp->channel_probe_id;
- if (nd->state == ncsi_dev_state_probe_gvi)
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ if (nd->state == ncsi_dev_state_probe_cis) {
+ nd->state = ncsi_dev_state_probe_gvi;
+ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0)
+ nd->state = ncsi_dev_state_probe_keep_phy;
+ } else if (nd->state == ncsi_dev_state_probe_gvi) {
nd->state = ncsi_dev_state_probe_gc;
- else if (nd->state == ncsi_dev_state_probe_gc)
+ } else if (nd->state == ncsi_dev_state_probe_gc) {
nd->state = ncsi_dev_state_probe_gls;
- else
+ } else {
+ nd->state = ncsi_dev_state_probe_cis;
+ ndp->channel_probe_id++;
+ }
+
+ if (ndp->channel_probe_id == ndp->channel_count) {
+ ndp->channel_probe_id = 0;
nd->state = ncsi_dev_state_probe_dp;
+ }
break;
case ncsi_dev_state_probe_dp:
ndp->pending_req_num = 1;
@@ -1780,6 +1777,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
ndp->requests[i].ndp = ndp;
timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
}
+ ndp->channel_count = NCSI_RESERVED_CHANNEL;
spin_lock_irqsave(&ncsi_dev_lock, flags);
list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
@@ -1813,6 +1811,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
if (!(ndp->flags & NCSI_DEV_PROBED)) {
ndp->package_probe_id = 0;
+ ndp->channel_probe_id = 0;
nd->state = ncsi_dev_state_probe;
schedule_work(&ndp->work);
return 0;
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index bee290d0f4..e28be33bdf 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
struct ncsi_rsp_gc_pkt *rsp;
struct ncsi_dev_priv *ndp = nr->ndp;
struct ncsi_channel *nc;
+ struct ncsi_package *np;
size_t size;
/* Find the channel */
rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
- NULL, &nc);
+ &np, &nc);
if (!nc)
return -ENODEV;
@@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
*/
nc->vlan_filter.bitmap = U64_MAX;
nc->vlan_filter.n_vids = rsp->vlan_cnt;
+ np->ndp->channel_count = rsp->channel_cnt;
return 0;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 441d1f1341..df2dc21304 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
config NETFILTER_XT_TARGET_CT
tristate '"CT" target support'
depends on NF_CONNTRACK
- depends on IP_NF_RAW || IP6_NF_RAW
+ depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This options adds a `CT' target, which allows to specify initial
@@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" and "TOS" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `DSCP' target, which allows you to manipulate
@@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP
config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
@@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
@@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS
config NETFILTER_XT_TARGET_TCPOPTSTRIP
tristate '"TCPOPTSTRIP" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a "TCPOPTSTRIP" target, which allows you to strip
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d4958e7e76..614815a3ed 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -101,7 +101,7 @@ endif
endif
ifdef CONFIG_NFT_CT
-ifdef CONFIG_RETPOLINE
+ifdef CONFIG_MITIGATION_RETPOLINE
nf_tables-objs += nft_ct_fast.o
endif
endif
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3126911f50..b00fc285b3 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -815,12 +815,21 @@ int __init netfilter_init(void)
if (ret < 0)
goto err;
+#ifdef CONFIG_LWTUNNEL
+ ret = netfilter_lwtunnel_init();
+ if (ret < 0)
+ goto err_lwtunnel_pernet;
+#endif
ret = netfilter_log_init();
if (ret < 0)
- goto err_pernet;
+ goto err_log_pernet;
return 0;
-err_pernet:
+err_log_pernet:
+#ifdef CONFIG_LWTUNNEL
+ netfilter_lwtunnel_fini();
+err_lwtunnel_pernet:
+#endif
unregister_pernet_subsys(&netfilter_net_ops);
err:
return ret;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3184cc6be4..61431690cb 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex or ip_set_ref_lock is held: */
-#define ip_set_dereference(p) \
- rcu_dereference_protected(p, \
+#define ip_set_dereference(inst) \
+ rcu_dereference_protected((inst)->ip_set_list, \
lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
- lockdep_is_held(&ip_set_ref_lock))
+ lockdep_is_held(&ip_set_ref_lock) || \
+ (inst)->is_deleted)
#define ip_set(inst, id) \
- ip_set_dereference((inst)->ip_set_list)[id]
+ ip_set_dereference(inst)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
#define ip_set_dereference_nfnl(p) \
@@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = ip_set_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
@@ -1172,23 +1173,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
+/* In order to return quickly when destroying a single set, it is split
+ * into two stages:
+ * - Cancel garbage collector
+ * - Destroy the set itself via call_rcu()
+ */
+
static void
-ip_set_destroy_set(struct ip_set *set)
+ip_set_destroy_set_rcu(struct rcu_head *head)
{
- pr_debug("set: %s\n", set->name);
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
- /* Must call it without holding any lock */
set->variant->destroy(set);
module_put(set->type->me);
kfree(set);
}
static void
-ip_set_destroy_set_rcu(struct rcu_head *head)
+_destroy_all_sets(struct ip_set_net *inst)
{
- struct ip_set *set = container_of(head, struct ip_set, rcu);
+ struct ip_set *set;
+ ip_set_id_t i;
+ bool need_wait = false;
- ip_set_destroy_set(set);
+ /* First cancel gc's: set:list sets are flushed as well */
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = ip_set(inst, i);
+ if (set) {
+ set->variant->cancel_gc(set);
+ if (set->type->features & IPSET_TYPE_NAME)
+ need_wait = true;
+ }
+ }
+ /* Must wait for flush to be really finished */
+ if (need_wait)
+ rcu_barrier();
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = ip_set(inst, i);
+ if (set) {
+ ip_set(inst, i) = NULL;
+ set->variant->destroy(set);
+ module_put(set->type->me);
+ kfree(set);
+ }
+ }
}
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
@@ -1202,11 +1230,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
-
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
- * ip_set_put|get_nfnl_* functions, that way we
+ * ip_set_nfnl_get_* functions, that way we
* can safely check references here.
*
* list:set timer can only decrement the reference
@@ -1214,8 +1241,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* without holding the lock.
*/
if (!attr[IPSET_ATTR_SETNAME]) {
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
@@ -1226,15 +1251,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
}
inst->is_destroyed = true;
read_unlock_bh(&ip_set_ref_lock);
- for (i = 0; i < inst->ip_set_max; i++) {
- s = ip_set(inst, i);
- if (s) {
- ip_set(inst, i) = NULL;
- /* Must cancel garbage collectors */
- s->variant->cancel_gc(s);
- ip_set_destroy_set(s);
- }
- }
+ _destroy_all_sets(inst);
/* Modified by ip_set_destroy() only, which is serialized */
inst->is_destroyed = false;
} else {
@@ -1255,12 +1272,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
if (features & IPSET_TYPE_NAME) {
/* Must wait for flush to be really finished */
rcu_barrier();
}
- /* Must cancel garbage collectors */
- s->variant->cancel_gc(s);
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
@@ -2365,30 +2382,25 @@ ip_set_net_init(struct net *net)
}
static void __net_exit
-ip_set_net_exit(struct net *net)
+ip_set_net_pre_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);
- struct ip_set *set = NULL;
- ip_set_id_t i;
-
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
+}
- nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < inst->ip_set_max; i++) {
- set = ip_set(inst, i);
- if (set) {
- ip_set(inst, i) = NULL;
- set->variant->cancel_gc(set);
- ip_set_destroy_set(set);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_IPSET);
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+
+ _destroy_all_sets(inst);
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
static struct pernet_operations ip_set_net_ops = {
.init = ip_set_net_init,
+ .pre_exit = ip_set_net_pre_exit,
.exit = ip_set_net_exit,
.id = &ip_set_net_id,
.size = sizeof(struct ip_set_net),
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6c3f28bc59..bfae706693 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct list_set *map = set->data;
struct set_adt_elem *d = value;
struct set_elem *e, *next, *prev = NULL;
- int ret;
+ int ret = 0;
- list_for_each_entry(e, &map->members, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (d->before == 0) {
ret = 1;
+ goto out;
} else if (d->before > 0) {
next = list_next_entry(e, list);
ret = !list_is_last(&e->list, &map->members) &&
@@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
} else {
ret = prev && prev->id == d->refid;
}
- return ret;
+ goto out;
}
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static void
@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
/* Find where to add the new entry */
n = prev = next = NULL;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e, *next, *prev = NULL;
+ struct set_elem *e, *n, *next, *prev = NULL;
- list_for_each_entry(e, &map->members, list) {
+ list_for_each_entry_safe(e, n, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -424,14 +428,8 @@ static void
list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e, *n;
- list_for_each_entry_safe(e, n, &map->members, list) {
- list_del(&e->list);
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- kfree(e);
- }
+ WARN_ON_ONCE(!list_empty(&map->members));
kfree(map);
set->data = NULL;
@@ -549,6 +547,9 @@ list_set_cancel_gc(struct ip_set *set)
if (SET_WITH_TIMEOUT(set))
timer_shutdown_sync(&map->gc);
+
+ /* Flush list to drop references to other ipsets */
+ list_set_flush(set);
}
static const struct ip_set_type_variant set_variant = {
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a743db0738..98d7dbe3d7 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
/* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
- sizeof(struct ip_vs_conn), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN);
if (!ip_vs_conn_cachep) {
kvfree(ip_vs_conn_tab);
return -ENOMEM;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a2c16b5010..c7a8a08b73 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1550,6 +1550,7 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (!dest)
goto unk;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 type;
/* Only support version 0 and C (csum) */
@@ -1560,7 +1561,10 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (type != htons(ETH_P_IP))
goto unk;
*proto = IPPROTO_IPIP;
- return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+
+ gre_flags_to_tnl_flags(flags, greh->flags);
+
+ return gre_calc_hlen(flags);
}
unk:
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 143a341bbc..f4384e147e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -94,6 +94,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
int availmem;
+ int amemthresh;
int nomem;
int to_change = -1;
@@ -105,7 +106,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < ipvs->sysctl_amemthresh);
+ amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0);
+ nomem = (availmem < amemthresh);
local_bh_disable();
@@ -145,9 +147,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 1:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
ipvs->sysctl_drop_packet = 2;
} else {
ipvs->drop_rate = 0;
@@ -155,9 +156,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 2:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
} else {
ipvs->drop_rate = 0;
ipvs->sysctl_drop_packet = 1;
@@ -1459,18 +1459,18 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
if (ret < 0)
goto out_err;
- /* Bind the ct retriever */
- RCU_INIT_POINTER(svc->pe, pe);
- pe = NULL;
-
/* Update the virtual service counters */
if (svc->port == FTPPORT)
atomic_inc(&ipvs->ftpsvc_counter);
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
- if (svc->pe && svc->pe->conn_out)
+ if (pe && pe->conn_out)
atomic_inc(&ipvs->conn_out_counter);
+ /* Bind the ct retriever */
+ RCU_INIT_POINTER(svc->pe, pe);
+ pe = NULL;
+
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services++;
@@ -2263,7 +2263,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
- { }
};
#endif
@@ -4270,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct ctl_table *tbl;
int idx, ret;
size_t ctl_table_size = ARRAY_SIZE(vs_vars);
+ bool unpriv = net->user_ns != &init_user_ns;
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4284,12 +4284,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- ctl_table_size = 0;
- }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4315,10 +4309,17 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_ports = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ports;
tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
+
ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+
ipvs->sysctl_sync_sock_size = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
+
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
@@ -4341,15 +4342,22 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+
ipvs->sysctl_run_estimation = 1;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_run_estimation;
ipvs->est_cpulist_valid = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_cpulist;
ipvs->sysctl_est_nice = IPVS_EST_NICE;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_nice;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 8ceec7a2fa..2423513d70 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblc_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblc_ctl_table = vs_vars_table;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0fb6470721..cdb1d4bf67 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblcr_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1e689c7141..83e4529164 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65e0259178..3313bceb6c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
(!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(addr_type & IPV6_ADDR_LOOPBACK);
old_rt_is_local = __ip_vs_is_local_route6(
- (struct rt6_info *)skb_dst(skb));
+ dst_rt6_info(skb_dst(skb)));
} else
#endif
{
@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rtable *) dest_dst->dst_cache;
+ rt = dst_rtable(dest_dst->dst_cache);
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
@@ -390,10 +390,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < 68) {
@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rt6_info *) dest_dst->dst_cache;
+ rt = dst_rt6_info(dest_dst->dst_cache);
else {
u32 cookie;
@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
rt_mode);
if (!dst)
goto err_unreach;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
}
local = __ip_vs_is_local_route6(rt);
@@ -553,10 +553,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < IPV6_MIN_MTU) {
@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_RDR);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1082,11 +1082,11 @@ ipvs_gre_encap(struct net *net, struct sk_buff *skb,
{
__be16 proto = *next_protocol == IPPROTO_IPIP ?
htons(ETH_P_IP) : htons(ETH_P_IPV6);
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t hdrlen;
if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
hdrlen = gre_calc_hlen(tflags);
gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
@@ -1165,11 +1165,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
tdev = rt->dst.dev;
/*
@@ -1310,11 +1310,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 0e4beae421..5257d5e7eb 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
static const struct bpf_func_proto *
bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 5d8ed6c90b..8715617b02 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void)
for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
- conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
- sizeof(struct nf_conncount_tuple),
- 0, 0, NULL);
+ conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0);
if (!conncount_conn_cachep)
return -ENOMEM;
- conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
- sizeof(struct nf_conncount_rb),
- 0, 0, NULL);
+ conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0);
if (!conncount_rb_cachep) {
kmem_cache_destroy(conncount_conn_cachep);
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 475358ec82..d2492d050f 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
__bpf_kfunc_end_defs();
-BTF_SET8_START(nf_ct_kfunc_set)
+BTF_KFUNCS_START(nf_ct_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
@@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
-BTF_SET8_END(nf_ct_kfunc_set)
+BTF_KFUNCS_END(nf_ct_kfunc_set)
static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5b876fa7f9..7ac20750c1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1440,8 +1440,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
u8 protonum = nf_ct_protonum(ct);
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
- return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
@@ -2024,7 +2022,7 @@ repeat:
goto repeat;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
+ if (ret == NF_DROP)
NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = -ret;
@@ -2530,7 +2528,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
* netfilter framework. Roll on, two-stage module
* delete...
*/
- synchronize_net();
+ synchronize_rcu_expedited();
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 81ca348915..21fa550966 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -722,9 +722,7 @@ int nf_conntrack_expect_init(void)
nf_ct_expect_hsize = 1;
}
nf_ct_expect_max = nf_ct_expect_hsize * 4;
- nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
- sizeof(struct nf_conntrack_expect),
- 0, 0, NULL);
+ nf_ct_expect_cachep = KMEM_CACHE(nf_conntrack_expect, 0);
if (!nf_ct_expect_cachep)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3b846cbdc0..4cbf71d078 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3420,7 +3420,8 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
- if (ntohl(id) != (u32)(unsigned long)exp) {
+
+ if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
return -ENOENT;
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e2db1f4ec2..ebc4f733bb 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -525,7 +525,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
@@ -533,7 +533,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
/* pull again, including possible 48 bit sequences and subtype header */
dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 1020d67600..327b805902 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = {
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
- [ICMPV6_MLD2_REPORT - 130] = 1
+ [ICMPV6_MLD2_REPORT - 130] = 1,
+ [ICMPV6_MRDISC_ADV - 130] = 1,
+ [ICMPV6_MRDISC_SOL - 130] = 1
};
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0ee98ce5b8..6c40bdf8b0 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,9 +22,6 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
-#ifdef CONFIG_LWTUNNEL
-#include <net/netfilter/nf_hooks_lwtunnel.h>
-#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -612,15 +609,10 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
-#ifdef CONFIG_LWTUNNEL
- NF_SYSCTL_CT_LWTUNNEL,
-#endif
- __NF_SYSCTL_CT_LAST_SYSCTL,
+ NF_SYSCTL_CT_LAST_SYSCTL,
};
-#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
-
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
@@ -948,16 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
-#ifdef CONFIG_LWTUNNEL
- [NF_SYSCTL_CT_LWTUNNEL] = {
- .procname = "nf_hooks_lwtunnel",
- .data = NULL,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
- },
-#endif
- {}
};
static struct ctl_table nf_ct_netfilter_table[] = {
@@ -968,7 +950,6 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
@@ -1122,7 +1103,7 @@ out_unregister_netfilter:
static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = cnet->sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(cnet->sysctl_header);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a057133923..5c1ff07eae 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc);
static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
{
- const struct rt6_info *rt;
-
- if (flow_tuple->l3proto == NFPROTO_IPV6) {
- rt = (const struct rt6_info *)flow_tuple->dst_cache;
- return rt6_get_cookie(rt);
- }
+ if (flow_tuple->l3proto == NFPROTO_IPV6)
+ return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
return 0;
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 5383bed3d3..c2c005234d 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
index 00e89ffd78..d8ebebc977 100644
--- a/net/netfilter/nf_hooks_lwtunnel.c
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -3,6 +3,9 @@
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
+#include <linux/netfilter.h>
+
+#include "nf_internals.h"
static inline int nf_hooks_lwtunnel_get(void)
{
@@ -50,4 +53,71 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+
+static struct ctl_table nf_lwtunnel_sysctl_table[] = {
+ {
+ .procname = "nf_hooks_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
+ },
+};
+
+static int __net_init nf_lwtunnel_net_init(struct net *net)
+{
+ struct ctl_table_header *hdr;
+ struct ctl_table *table;
+
+ table = nf_lwtunnel_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(nf_lwtunnel_sysctl_table,
+ sizeof(nf_lwtunnel_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+ }
+
+ hdr = register_net_sysctl_sz(net, "net/netfilter", table,
+ ARRAY_SIZE(nf_lwtunnel_sysctl_table));
+ if (!hdr)
+ goto err_reg;
+
+ net->nf.nf_lwtnl_dir_header = hdr;
+
+ return 0;
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit nf_lwtunnel_net_exit(struct net *net)
+{
+ const struct ctl_table *table;
+
+ table = net->nf.nf_lwtnl_dir_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct pernet_operations nf_lwtunnel_net_ops = {
+ .init = nf_lwtunnel_net_init,
+ .exit = nf_lwtunnel_net_exit,
+};
+
+int __init netfilter_lwtunnel_init(void)
+{
+ return register_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+
+void netfilter_lwtunnel_fini(void)
+{
+ unregister_pernet_subsys(&nf_lwtunnel_net_ops);
+}
+#else
+int __init netfilter_lwtunnel_init(void) { return 0; }
+void netfilter_lwtunnel_fini(void) {}
#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 832ae64179..2540302306 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
int __init netfilter_log_init(void);
+#ifdef CONFIG_LWTUNNEL
+/* nf_hooks_lwtunnel.c */
+int __init netfilter_lwtunnel_init(void);
+void netfilter_lwtunnel_fini(void);
+#endif
+
/* core.c */
void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
const struct nf_hook_ops *reg);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index e16f158388..769fd7680f 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -31,10 +31,10 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
int i;
for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
- if (loggers[pf][i] == NULL)
+ log = nft_log_dereference(loggers[pf][i]);
+ if (!log)
continue;
- log = nft_log_dereference(loggers[pf][i]);
if (!strncasecmp(str_logger, log->name, strlen(log->name)))
return log;
}
@@ -156,6 +156,11 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
struct nf_logger *logger;
int ret = -ENOENT;
+ if (pf >= ARRAY_SIZE(loggers))
+ return -EINVAL;
+ if (type >= NF_LOG_TYPE_MAX)
+ return -EINVAL;
+
if (pf == NFPROTO_INET) {
ret = nf_logger_find_get(NFPROTO_IPV4, type);
if (ret < 0)
@@ -390,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
-static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
+static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO];
static struct ctl_table_header *nf_log_sysctl_fhdr;
static struct ctl_table nf_log_sysctl_ftable[] = {
@@ -401,7 +406,6 @@ static struct ctl_table nf_log_sysctl_ftable[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int nf_log_proc_dostring(struct ctl_table *table, int write,
@@ -509,7 +513,7 @@ err_alloc:
static void netfilter_log_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->nf.nf_log_dir_header->ctl_table_arg;
unregister_net_sysctl_table(net->nf.nf_log_dir_header);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 6e3b2f5885..481be15609 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
__bpf_kfunc_end_defs();
-BTF_SET8_START(nf_nat_kfunc_set)
+BTF_KFUNCS_START(nf_nat_kfunc_set)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
-BTF_SET8_END(nf_nat_kfunc_set)
+BTF_KFUNCS_END(nf_nat_kfunc_set)
static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index e2f334f702..7f12e56e6e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -248,109 +248,3 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
return 0;
}
EXPORT_SYMBOL_GPL(nf_queue);
-
-static unsigned int nf_iterate(struct sk_buff *skb,
- struct nf_hook_state *state,
- const struct nf_hook_entries *hooks,
- unsigned int *index)
-{
- const struct nf_hook_entry *hook;
- unsigned int verdict, i = *index;
-
- while (i < hooks->num_hook_entries) {
- hook = &hooks->hooks[i];
-repeat:
- verdict = nf_hook_entry_hookfn(hook, skb, state);
- if (verdict != NF_ACCEPT) {
- *index = i;
- if (verdict != NF_REPEAT)
- return verdict;
- goto repeat;
- }
- i++;
- }
-
- *index = i;
- return NF_ACCEPT;
-}
-
-static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
-{
- switch (pf) {
-#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
- case NFPROTO_BRIDGE:
- return rcu_dereference(net->nf.hooks_bridge[hooknum]);
-#endif
- case NFPROTO_IPV4:
- return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
- case NFPROTO_IPV6:
- return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-
- return NULL;
-}
-
-/* Caller must hold rcu read-side lock */
-void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
-{
- const struct nf_hook_entry *hook_entry;
- const struct nf_hook_entries *hooks;
- struct sk_buff *skb = entry->skb;
- const struct net *net;
- unsigned int i;
- int err;
- u8 pf;
-
- net = entry->state.net;
- pf = entry->state.pf;
-
- hooks = nf_hook_entries_head(net, pf, entry->state.hook);
-
- i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
- kfree_skb(skb);
- nf_queue_entry_free(entry);
- return;
- }
-
- hook_entry = &hooks->hooks[i];
-
- /* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT)
- verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
-
- if (verdict == NF_ACCEPT) {
- if (nf_reroute(skb, entry) < 0)
- verdict = NF_DROP;
- }
-
- if (verdict == NF_ACCEPT) {
-next_hook:
- ++i;
- verdict = nf_iterate(skb, &entry->state, hooks, &i);
- }
-
- switch (verdict & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_STOP:
- local_bh_disable();
- entry->state.okfn(entry->state.net, entry->state.sk, skb);
- local_bh_enable();
- break;
- case NF_QUEUE:
- err = nf_queue(skb, &entry->state, i, verdict);
- if (err == 1)
- goto next_hook;
- break;
- case NF_STOLEN:
- break;
- default:
- kfree_skb(skb);
- }
-
- nf_queue_entry_free(entry);
-}
-EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index fbbc4fd373..5b140c12b7 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb,
skb_reset_network_header(skb);
iph = skb_put(skb, sizeof(*iph));
ip6_flow_hdr(iph, 0, 0);
- iph->hop_limit = net->ipv6.devconf_all->hop_limit;
+ iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit);
iph->nexthdr = IPPROTO_TCP;
iph->saddr = *saddr;
iph->daddr = *daddr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0e697e53a7..91cc3a81ba 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1203,8 +1203,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
- __NFT_TABLE_F_WAS_AWAKEN)
+ __NFT_TABLE_F_WAS_AWAKEN | \
+ __NFT_TABLE_F_WAS_ORPHAN)
static bool nft_table_pending_update(const struct nft_ctx *ctx)
{
@@ -1244,8 +1246,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if ((nft_table_has_owner(ctx->table) &&
!(flags & NFT_TABLE_F_OWNER)) ||
- (!nft_table_has_owner(ctx->table) &&
- flags & NFT_TABLE_F_OWNER))
+ (flags & NFT_TABLE_F_OWNER &&
+ !nft_table_is_orphan(ctx->table)))
+ return -EOPNOTSUPP;
+
+ if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
@@ -1274,6 +1279,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
}
}
+ if ((flags & NFT_TABLE_F_OWNER) &&
+ !nft_table_has_owner(ctx->table)) {
+ ctx->table->nlpid = ctx->portid;
+ ctx->table->flags |= NFT_TABLE_F_OWNER |
+ __NFT_TABLE_F_WAS_ORPHAN;
+ }
+
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -3321,7 +3333,7 @@ err_expr_parse:
return ERR_PTR(err);
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp)
{
int err;
@@ -3329,7 +3341,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
return -EINVAL;
dst->ops = src->ops;
- err = src->ops->clone(dst, src);
+ err = src->ops->clone(dst, src, gfp);
if (err < 0)
return err;
@@ -3811,6 +3823,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3832,6 +3853,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -4291,23 +4315,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
* given, in that case the amount of memory per element is used.
*/
static const struct nft_set_ops *
-nft_select_set_ops(const struct nft_ctx *ctx,
- const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx, u32 flags,
const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
- u32 flags = 0;
int i;
lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
- if (nla[NFTA_SET_FLAGS] != NULL)
- flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-
bops = NULL;
best.size = ~0;
best.lookup = ~0;
@@ -5060,9 +5079,6 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
(NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
return -EOPNOTSUPP;
- if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
- (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
- return -EOPNOTSUPP;
}
desc.dtype = 0;
@@ -5202,7 +5218,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc);
+ ops = nft_select_set_ops(&ctx, flags, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -5736,8 +5752,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
- set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
- set->dlen) < 0)
+ nft_set_datatype(set), set->dlen) < 0)
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
@@ -6521,7 +6536,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
if (!expr)
goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]);
+ err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT);
if (err < 0) {
kfree(expr);
goto err_expr;
@@ -6560,7 +6575,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- err = nft_expr_clone(expr, expr_array[i]);
+ err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT);
if (err < 0)
goto err_elem_expr_setup;
@@ -7772,6 +7787,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (WARN_ON_ONCE(!type))
return -ENOENT;
+ if (!obj->ops->update)
+ return 0;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -9463,9 +9481,10 @@ static void nft_obj_commit_update(struct nft_trans *trans)
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- if (obj->ops->update)
- obj->ops->update(obj, newobj);
+ if (WARN_ON_ONCE(!obj->ops->update))
+ return;
+ obj->ops->update(obj, newobj);
nft_obj_destroy(&trans->ctx, newobj);
}
@@ -10537,6 +10556,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
}
+ if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
+ trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
+ trans->ctx.table->nlpid = 0;
+ }
trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
@@ -10798,150 +10821,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_elem_priv *elem_priv)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
-
- if (!nft_set_elem_active(ext, iter->genmask))
- return 0;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- if (fatal_signal_pending(current))
- return -EINTR;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -11054,13 +10933,16 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
return 0;
default:
+ if (type != NFT_DATA_VALUE)
+ return -EINVAL;
+
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
if (len == 0)
@@ -11069,8 +10951,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- if (data != NULL && type != NFT_DATA_VALUE)
- return -EINVAL;
return 0;
}
}
@@ -11471,12 +11351,15 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nf_tables_destroy_list))
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
+ if (table->flags & NFT_TABLE_F_PERSIST) {
+ table->flags &= ~NFT_TABLE_F_OWNER;
+ continue;
+ }
__nft_release_hook(net, table);
list_del_rcu(&table->list);
to_delete[deleted++] = table;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index c3e6353647..a48d5f0e2f 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,7 +21,7 @@
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nft_meta.h>
-#if defined(CONFIG_RETPOLINE) && defined(CONFIG_X86)
+#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_X86)
static struct static_key_false nf_tables_skip_direct_calls;
@@ -207,7 +207,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
struct nft_regs *regs,
struct nft_pktinfo *pkt)
{
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
unsigned long e;
if (nf_skip_indirect_calls())
@@ -236,7 +236,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_objref_map_eval);
#undef X
indirect_call:
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
expr->ops->eval(expr, regs, pkt);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c9fbe0f707..4abf660c7b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -427,6 +427,9 @@ replay_abort:
nfnl_unlock(subsys_id);
+ if (nlh->nlmsg_flags & NLM_F_ACK)
+ nfnl_err_add(&err_list, nlh, 0, &extack);
+
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -573,6 +576,8 @@ done:
} else if (err) {
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+ } else if (nlh->nlmsg_flags & NLM_F_ACK) {
+ nfnl_err_add(&err_list, nlh, 0, &extack);
}
} else {
enum nfnl_abort_action abort_action;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5cf38fc0a3..55e28e1da6 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head)
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
rcu);
+ rcu_read_lock();
nfqnl_flush(inst, NULL, 0);
+ rcu_read_unlock();
kfree(inst);
module_put(THIS_MODULE);
}
@@ -225,6 +227,148 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
return entry;
}
+static unsigned int nf_iterate(struct sk_buff *skb,
+ struct nf_hook_state *state,
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
+{
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
+
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
+repeat:
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
+ if (verdict != NF_ACCEPT) {
+ *index = i;
+ if (verdict != NF_REPEAT)
+ return verdict;
+ goto repeat;
+ }
+ i++;
+ }
+
+ *index = i;
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+{
+ switch (pf) {
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
+#endif
+ case NFPROTO_IPV4:
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+ case NFPROTO_IPV6:
+ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+ const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (!(iph->tos == rt_info->tos &&
+ skb->mark == rt_info->mark &&
+ iph->daddr == rt_info->daddr &&
+ iph->saddr == rt_info->saddr))
+ return ip_route_me_harder(entry->state.net, entry->state.sk,
+ skb, RTN_UNSPEC);
+ }
+#endif
+ return 0;
+}
+
+static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ ret = nf_ip_reroute(skb, entry);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->reroute(skb, entry);
+ break;
+ }
+ return ret;
+}
+
+/* caller must hold rcu read-side lock */
+static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
+ struct sk_buff *skb = entry->skb;
+ const struct net *net;
+ unsigned int i;
+ int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
+
+ i = entry->hook_index;
+ if (!hooks || i >= hooks->num_hook_entries) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
+ nf_queue_entry_free(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
+ /* Continue traversal iff userspace said ok... */
+ if (verdict == NF_REPEAT)
+ verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
+
+ if (verdict == NF_ACCEPT) {
+ if (nf_reroute(skb, entry) < 0)
+ verdict = NF_DROP;
+ }
+
+ if (verdict == NF_ACCEPT) {
+next_hook:
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
+ }
+
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_STOP:
+ local_bh_disable();
+ entry->state.okfn(entry->state.net, entry->state.sk, skb);
+ local_bh_enable();
+ break;
+ case NF_QUEUE:
+ err = nf_queue(skb, &entry->state, i, verdict);
+ if (err == 1)
+ goto next_hook;
+ break;
+ case NF_STOLEN:
+ break;
+ default:
+ kfree_skb(skb);
+ }
+
+ nf_queue_entry_free(entry);
+}
+
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d170758a1e..7010541fcc 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -325,9 +325,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_hook *hook, *found = NULL;
int n = 0;
- if (event != NETDEV_UNREGISTER)
- return;
-
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev == dev)
found = hook;
@@ -367,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
.net = dev_net(dev),
};
- if (event != NETDEV_UNREGISTER &&
- event != NETDEV_CHANGENAME)
+ if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
nft_net = nft_pernet(ctx.net);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index de9d1980df..92b984fa81 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -210,12 +210,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx,
nft_connlimit_do_destroy(ctx, priv);
}
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index dccc68a513..291ed20263 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -226,7 +226,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
@@ -236,7 +236,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 255640013a..452ed94c3a 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -754,7 +754,7 @@ static bool nft_ct_set_reduce(struct nft_regs_track *track,
return false;
}
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
static const struct nft_expr_ops nft_ct_get_fast_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
@@ -799,7 +799,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EINVAL);
if (tb[NFTA_CT_DREG]) {
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
u32 k = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (k) {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index c09dba5735..b4ada3ab21 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+ if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0)
return -1;
elem_expr->size += priv->expr_array[i]->ops->size;
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 37cfe6dd71..b58f62195f 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
- hooks = (1 << NF_INET_PRE_ROUTING);
- if (priv->flags & NFTA_FIB_F_IIF) {
- hooks |= (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_FORWARD);
- }
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 8e6d7eaf9d..de1b6066bf 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -102,12 +102,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
kfree(priv->last);
}
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index cefa25e0db..21d26b79b4 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx,
}
static int nft_limit_clone(struct nft_limit_priv *priv_dst,
- const struct nft_limit_priv *priv_src)
+ const struct nft_limit_priv *priv_src, gfp_t gfp)
{
priv_dst->tokens_max = priv_src->tokens_max;
priv_dst->rate = priv_src->rate;
@@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp);
if (!priv_dst->limit)
return -ENOMEM;
@@ -223,14 +223,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, &priv->limit);
}
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp);
}
static struct nft_expr_type nft_limit_type;
@@ -281,12 +282,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, priv);
}
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src);
+ return nft_limit_clone(priv_dst, priv_src, gfp);
}
static const struct nft_expr_ops nft_limit_bytes_ops = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 87c18eddb0..f3080fa1b2 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -24,7 +24,7 @@ struct nft_lookup {
struct nft_set_binding binding;
};
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
@@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
- &priv->dreg, NULL, set->dtype,
+ &priv->dreg, NULL,
+ nft_set_datatype(set),
set->dlen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index ba0d3683a4..9139ce38ea 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx,
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
+ if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG])
+ return -EINVAL;
+
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_PROTOCOL:
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 7f61506e5b..7fec57ff73 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -63,7 +63,6 @@ static int nft_osf_init(const struct nft_ctx *ctx,
{
struct nft_osf *priv = nft_expr_priv(expr);
u32 flags;
- int err;
u8 ttl;
if (!tb[NFTA_OSF_DREG])
@@ -83,13 +82,9 @@ static int nft_osf_init(const struct nft_ctx *ctx,
priv->flags = flags;
}
- err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
- NULL, NFT_DATA_VALUE,
- NFT_OSF_MAXGENRELEN);
- if (err < 0)
- return err;
-
- return 0;
+ return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE,
+ NFT_OSF_MAXGENRELEN);
}
static int nft_osf_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 0a689c8e02..50429cbd42 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
int mac_off = skb_mac_header(skb) - skb->data;
u8 *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
- u8 vlan_hlen = 0;
-
- if ((skb->protocol == htons(ETH_P_8021AD) ||
- skb->protocol == htons(ETH_P_8021Q)) &&
- offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
- vlan_hlen += VLAN_HLEN;
vlanh = (u8 *) &veth;
- if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+ if (offset < VLAN_ETH_HLEN) {
u8 ethlen = len;
- if (vlan_hlen &&
- skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
- return false;
- else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+ if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
return false;
- if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+ if (offset + len > VLAN_ETH_HLEN)
+ ethlen -= offset + len - VLAN_ETH_HLEN;
- memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+ memcpy(dst_u8, vlanh + offset, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
- offset = ETH_HLEN + vlan_hlen;
+ offset = ETH_HLEN;
} else {
- offset -= VLAN_HLEN + vlan_hlen;
+ offset -= VLAN_HLEN;
}
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
@@ -154,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}
-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+static bool nft_payload_need_vlan_adjust(u32 offset, u32 len)
{
- unsigned int len = priv->offset + priv->len;
+ unsigned int boundary = offset + len;
/* data past ether src/dst requested, copy needed */
- if (len > offsetof(struct ethhdr, h_proto))
+ if (boundary > offsetof(struct ethhdr, h_proto))
return true;
return false;
@@ -183,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
if (skb_vlan_tag_present(skb) &&
- nft_payload_need_vlan_copy(priv)) {
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
@@ -659,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx,
struct nft_payload *priv = nft_expr_priv(expr);
u32 base;
+ if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] ||
+ !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG])
+ return -EINVAL;
+
base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
switch (base) {
case NFT_PAYLOAD_TUN_HEADER:
@@ -810,21 +805,79 @@ struct nft_payload_set {
u8 csum_flags;
};
+/* This is not struct vlan_hdr. */
+struct nft_payload_vlan_hdr {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+};
+
+static bool
+nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
+ int *vlan_hlen)
+{
+ struct nft_payload_vlan_hdr *vlanh;
+ __be16 vlan_proto;
+ u16 vlan_tci;
+
+ if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) {
+ *vlan_hlen = VLAN_HLEN;
+ return true;
+ }
+
+ switch (offset) {
+ case offsetof(struct vlan_ethhdr, h_vlan_proto):
+ if (len == 2) {
+ vlan_proto = nft_reg_load_be16(src);
+ skb->vlan_proto = vlan_proto;
+ } else if (len == 4) {
+ vlanh = (struct nft_payload_vlan_hdr *)src;
+ __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto,
+ ntohs(vlanh->h_vlan_TCI));
+ } else {
+ return false;
+ }
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+ if (len != 2)
+ return false;
+
+ vlan_tci = ntohs(nft_reg_load_be16(src));
+ skb->vlan_tci = vlan_tci;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
- struct sk_buff *skb = pkt->skb;
const u32 *src = &regs->data[priv->sreg];
- int offset, csum_offset;
+ int offset, csum_offset, vlan_hlen = 0;
+ struct sk_buff *skb = pkt->skb;
__wsum fsum, tsum;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
- offset = skb_mac_header(skb) - skb->data;
+
+ if (skb_vlan_tag_present(skb) &&
+ nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
+ if (!nft_payload_set_vlan(src, skb,
+ priv->offset, priv->len,
+ &vlan_hlen))
+ goto err;
+
+ if (!vlan_hlen)
+ return;
+ }
+
+ offset = skb_mac_header(skb) - skb->data - vlan_hlen;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
offset = skb_network_offset(skb);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 3ba12a7471..9b2d7463d3 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -233,7 +233,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
return nft_quota_do_destroy(ctx, priv);
}
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
struct nft_quota *priv_src = nft_expr_priv(src);
@@ -241,7 +241,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
priv_dst->quota = priv_src->quota;
priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp);
if (!priv_dst->consumed)
return -ENOMEM;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 24d9771385..14d88394bc 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ *dest = (__force u32)rt_nexthop(dst_rtable(dst),
ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
goto err;
- memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ memcpy(dest, rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index b42a34087e..eb4c4a4ac7 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -359,11 +359,13 @@
*
* Return: -1 on no match, bit position on 'match_only', 0 otherwise.
*/
-int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
+int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
+ unsigned long *dst,
const union nft_pipapo_map_bucket *mt, bool match_only)
{
unsigned long bitset;
- int k, ret = -1;
+ unsigned int k;
+ int ret = -1;
for (k = 0; k < len; k++) {
bitset = map[k];
@@ -432,7 +434,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
res_map = scratch->map + (map_index ? m->bsize_max : 0);
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -502,9 +504,11 @@ out:
* pipapo_get() - Get matching element reference given key data
* @net: Network namespace
* @set: nftables API set representation
+ * @m: storage containing active/existing elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
+ * @gfp: the type of memory to allocate (see kmalloc).
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -514,31 +518,31 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
+ const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
- u64 tstamp)
+ u64 tstamp, gfp_t gfp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- struct nft_pipapo *priv = nft_set_priv(set);
unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
int i;
- m = priv->clone;
+ if (m->bsize_max == 0)
+ return ret;
- res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+ res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp);
if (!res_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
}
- fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+ fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp);
if (!fill_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
}
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -607,10 +611,13 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net), get_jiffies_64());
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64(),
+ GFP_ATOMIC);
if (IS_ERR(e))
return ERR_CAST(e);
@@ -618,6 +625,65 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
}
/**
+ * pipapo_realloc_mt() - Reallocate mapping table if needed upon resize
+ * @f: Field containing mapping table
+ * @old_rules: Amount of existing mapped rules
+ * @rules: Amount of new rules to map
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int pipapo_realloc_mt(struct nft_pipapo_field *f,
+ unsigned int old_rules, unsigned int rules)
+{
+ union nft_pipapo_map_bucket *new_mt = NULL, *old_mt = f->mt;
+ const unsigned int extra = PAGE_SIZE / sizeof(*new_mt);
+ unsigned int rules_alloc = rules;
+
+ might_sleep();
+
+ if (unlikely(rules == 0))
+ goto out_free;
+
+ /* growing and enough space left, no action needed */
+ if (rules > old_rules && f->rules_alloc > rules)
+ return 0;
+
+ /* downsize and extra slack has not grown too large */
+ if (rules < old_rules) {
+ unsigned int remove = f->rules_alloc - rules;
+
+ if (remove < (2u * extra))
+ return 0;
+ }
+
+ /* If set needs more than one page of memory for rules then
+ * allocate another extra page to avoid frequent reallocation.
+ */
+ if (rules > extra &&
+ check_add_overflow(rules, extra, &rules_alloc))
+ return -EOVERFLOW;
+
+ new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL);
+ if (!new_mt)
+ return -ENOMEM;
+
+ if (old_mt)
+ memcpy(new_mt, old_mt, min(old_rules, rules) * sizeof(*new_mt));
+
+ if (rules > old_rules) {
+ memset(new_mt + old_rules, 0,
+ (rules - old_rules) * sizeof(*new_mt));
+ }
+out_free:
+ f->rules_alloc = rules_alloc;
+ f->mt = new_mt;
+
+ kvfree(old_mt);
+
+ return 0;
+}
+
+/**
* pipapo_resize() - Resize lookup or mapping table, or both
* @f: Field containing lookup and mapping tables
* @old_rules: Previous amount of rules in field
@@ -629,12 +695,15 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
*
* Return: 0 on success, -ENOMEM on allocation failure.
*/
-static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
+static int pipapo_resize(struct nft_pipapo_field *f,
+ unsigned int old_rules, unsigned int rules)
{
long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
- union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt;
- size_t new_bucket_size, copy;
- int group, bucket;
+ unsigned int new_bucket_size, copy;
+ int group, bucket, err;
+
+ if (rules >= NFT_PIPAPO_RULE0_MAX)
+ return -ENOSPC;
new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG);
#ifdef NFT_PIPAPO_ALIGN
@@ -674,27 +743,18 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
}
mt:
- new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL);
- if (!new_mt) {
+ err = pipapo_realloc_mt(f, old_rules, rules);
+ if (err) {
kvfree(new_lt);
- return -ENOMEM;
- }
-
- memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt));
- if (rules > old_rules) {
- memset(new_mt + old_rules, 0,
- (rules - old_rules) * sizeof(*new_mt));
+ return err;
}
if (new_lt) {
f->bsize = new_bucket_size;
- NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+ f->lt = new_lt;
kvfree(old_lt);
}
- f->mt = new_mt;
- kvfree(old_mt);
-
return 0;
}
@@ -845,8 +905,8 @@ static void pipapo_lt_8b_to_4b(int old_groups, int bsize,
*/
static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
{
+ unsigned int groups, bb;
unsigned long *new_lt;
- int groups, bb;
size_t lt_size;
lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
@@ -896,7 +956,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
f->groups = groups;
f->bb = bb;
kvfree(f->lt);
- NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+ f->lt = new_lt;
}
/**
@@ -913,7 +973,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules, group, ret, bit_offset = 0;
+ unsigned int rule = f->rules, group, ret, bit_offset = 0;
ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
@@ -1188,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return 0;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
+static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old);
+
+/**
+ * pipapo_maybe_clone() - Build clone for pending data changes, if not existing
+ * @set: nftables API set representation
+ *
+ * Return: newly created or existing clone, if any. NULL on allocation failure
+ */
+static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m;
+
+ if (priv->clone)
+ return priv->clone;
+
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = pipapo_clone(m);
+
+ return priv->clone;
+}
+
/**
* nft_pipapo_insert() - Validate and insert ranged elements
* @net: Network namespace
@@ -1204,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
u64 tstamp = nft_net_tstamp(net);
@@ -1213,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
+ if (!m)
+ return -ENOMEM;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
end = (const u8 *)nft_set_ext_key_end(ext)->data;
else
end = start;
- dup = pipapo_get(net, set, start, genmask, tstamp);
+ dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1240,7 +1336,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
+ dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
+ GFP_KERNEL);
}
if (PTR_ERR(dup) != -ENOENT) {
@@ -1253,8 +1350,14 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
/* Validate */
start_p = start;
end_p = end;
+
+ /* some helpers return -1, or 0 >= for valid rule pos,
+ * so we cannot support more than INT_MAX rules at this time.
+ */
+ BUILD_BUG_ON(NFT_PIPAPO_RULE0_MAX > INT_MAX);
+
nft_pipapo_for_each_field(f, i, m) {
- if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
+ if (f->rules >= NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
if (memcmp(start_p, end_p,
@@ -1266,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Insert */
- priv->dirty = true;
-
bsize_max = m->bsize_max;
nft_pipapo_for_each_field(f, i, m) {
@@ -1318,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
* pipapo_clone() - Clone matching data to create new working copy
* @old: Existing matching data
*
- * Return: copy of matching data passed as 'old', error pointer on failure
+ * Return: copy of matching data passed as 'old' or NULL.
*/
static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
{
@@ -1328,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
- return ERR_PTR(-ENOMEM);
+ return NULL;
new->field_count = old->field_count;
new->bsize_max = old->bsize_max;
@@ -1360,18 +1461,25 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new_lt)
goto out_lt;
- NFT_PIPAPO_LT_ASSIGN(dst, new_lt);
+ dst->lt = new_lt;
memcpy(NFT_PIPAPO_LT_ALIGN(new_lt),
NFT_PIPAPO_LT_ALIGN(src->lt),
src->bsize * sizeof(*dst->lt) *
src->groups * NFT_PIPAPO_BUCKETS(src->bb));
- dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
- if (!dst->mt)
- goto out_mt;
+ if (src->rules > 0) {
+ dst->mt = kvmalloc_array(src->rules_alloc,
+ sizeof(*src->mt), GFP_KERNEL);
+ if (!dst->mt)
+ goto out_mt;
+
+ memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
+ } else {
+ dst->mt = NULL;
+ dst->rules_alloc = 0;
+ }
- memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
src++;
dst++;
}
@@ -1393,7 +1501,7 @@ out_scratch:
free_percpu(new->scratch);
kfree(new);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
/**
@@ -1425,10 +1533,10 @@ out_scratch:
*
* Return: Number of rules that originated from the same entry as @first.
*/
-static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
+static unsigned int pipapo_rules_same_key(struct nft_pipapo_field *f, unsigned int first)
{
struct nft_pipapo_elem *e = NULL; /* Keep gcc happy */
- int r;
+ unsigned int r;
for (r = first; r < f->rules; r++) {
if (r != first && e != f->mt[r].e)
@@ -1481,8 +1589,9 @@ static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
* 0 1 2
* element pointers: 0x42 0x42 0x44
*/
-static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules,
- int start, int n, int to_offset, bool is_last)
+static void pipapo_unmap(union nft_pipapo_map_bucket *mt, unsigned int rules,
+ unsigned int start, unsigned int n,
+ unsigned int to_offset, bool is_last)
{
int i;
@@ -1588,8 +1697,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ unsigned int rules_f0, first_rule = 0;
u64 tstamp = nft_net_tstamp(net);
- int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1600,7 +1709,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const struct nft_pipapo_field *f;
- int i, start, rules_fx;
+ unsigned int i, start, rules_fx;
start = first_rule;
rules_fx = rules_f0;
@@ -1624,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- priv->dirty = true;
-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
@@ -1703,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *old;
-
- if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ struct nft_pipapo_match *old;
- if (!priv->dirty)
+ if (!priv->clone)
return;
- new_clone = pipapo_clone(priv->clone);
- if (IS_ERR(new_clone))
- return;
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ pipapo_gc(set, priv->clone);
- priv->dirty = false;
+ old = rcu_replace_pointer(priv->match, priv->clone,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = NULL;
- old = rcu_access_pointer(priv->match);
- rcu_assign_pointer(priv->match, priv->clone);
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
-
- priv->clone = new_clone;
-}
-
-static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
-{
-#ifdef CONFIG_PROVE_LOCKING
- const struct net *net = read_pnet(&set->net);
-
- return lockdep_is_held(&nft_pernet(net)->commit_mutex);
-#else
- return true;
-#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *m;
- if (!priv->dirty)
+ if (!priv->clone)
return;
-
- m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
-
- new_clone = pipapo_clone(m);
- if (IS_ERR(new_clone))
- return;
-
- priv->dirty = false;
-
pipapo_free_match(priv->clone);
- priv->clone = new_clone;
+ priv->clone = NULL;
}
/**
@@ -1777,51 +1857,38 @@ static void nft_pipapo_activate(const struct net *net,
}
/**
- * pipapo_deactivate() - Check that element is in set, mark as inactive
+ * nft_pipapo_deactivate() - Search for element and make it inactive
* @net: Network namespace
* @set: nftables API set representation
- * @data: Input key data
- * @ext: nftables API extension pointer, used to check for end element
- *
- * This is a convenience function that can be called from both
- * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same
- * operation.
+ * @elem: nftables API element representation containing key data
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const u8 *data, const struct nft_set_ext *ext)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
+ /* removal must occur on priv->clone, if we are low on memory
+ * we have no choice and must fail the removal request.
+ */
+ if (!m)
+ return NULL;
+
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
- return e;
-}
-
-/**
- * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive
- * @net: Network namespace
- * @set: nftables API set representation
- * @elem: nftables API element representation containing key data
- *
- * Return: deactivated element if found, NULL otherwise.
- */
-static struct nft_elem_priv *
-nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-
- return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
+ return &e->priv;
}
/**
- * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
+ * nft_pipapo_flush() - make element inactive
* @net: Network namespace
* @set: nftables API set representation
* @elem_priv: nftables API element representation containing key data
@@ -1978,7 +2045,7 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
- int rules_f0, first_rule = 0;
+ unsigned int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
const u8 *data;
@@ -2018,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
if (last && f->mt[rulemap[i].to].e == e) {
- priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
@@ -2031,34 +2097,22 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
}
/**
- * nft_pipapo_walk() - Walk over elements
+ * nft_pipapo_do_walk() - Walk over elements in m
* @ctx: nftables API context
* @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
* @iter: Iterator
*
* As elements are referenced in the mapping array for the last field, directly
* scan that array: there's no need to follow rule mappings from the first
- * field.
+ * field. @m is protected either by RCU read lock or by transaction mutex.
*/
-static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_pipapo_match *m,
+ struct nft_set_iter *iter)
{
- struct nft_pipapo *priv = nft_set_priv(set);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
- int i, r;
-
- WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
- iter->type != NFT_ITER_UPDATE);
-
- rcu_read_lock();
- if (iter->type == NFT_ITER_READ)
- m = rcu_dereference(priv->match);
- else
- m = priv->clone;
-
- if (unlikely(!m))
- goto out;
+ unsigned int i, r;
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2076,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
- goto out;
+ return;
cont:
iter->count++;
}
+}
-out:
- rcu_read_unlock();
+/**
+ * nft_pipapo_walk() - Walk over elements
+ * @ctx: nftables API context
+ * @set: nftables API set representation
+ * @iter: Iterator
+ *
+ * Test if destructive action is needed or not, clone active backend if needed
+ * and call the real function to work on the data.
+ */
+static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ const struct nft_pipapo_match *m;
+
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ m = pipapo_maybe_clone(set);
+ if (!m) {
+ iter->err = -ENOMEM;
+ return;
+ }
+
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_READ:
+ rcu_read_lock();
+ m = rcu_dereference(priv->match);
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ rcu_read_unlock();
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
}
/**
@@ -2150,6 +2239,9 @@ static int nft_pipapo_init(const struct nft_set *set,
field_count = desc->field_count ? : 1;
+ BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS > 255);
+ BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS != NFT_REG32_COUNT);
+
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
@@ -2171,7 +2263,11 @@ static int nft_pipapo_init(const struct nft_set *set,
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
- int len = desc->field_len[i] ? : set->klen;
+ unsigned int len = desc->field_len[i] ? : set->klen;
+
+ /* f->groups is u8 */
+ BUILD_BUG_ON((NFT_PIPAPO_MAX_BYTES *
+ BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS_LARGE_SET) >= 256);
f->bb = NFT_PIPAPO_GROUP_BITS_INIT;
f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f);
@@ -2180,25 +2276,15 @@ static int nft_pipapo_init(const struct nft_set *set,
f->bsize = 0;
f->rules = 0;
- NFT_PIPAPO_LT_ASSIGN(f, NULL);
+ f->rules_alloc = 0;
+ f->lt = NULL;
f->mt = NULL;
}
- /* Create an initial clone of matching data for next insertion */
- priv->clone = pipapo_clone(m);
- if (IS_ERR(priv->clone)) {
- err = PTR_ERR(priv->clone);
- goto out_free;
- }
-
- priv->dirty = false;
-
rcu_assign_pointer(priv->match, m);
return 0;
-out_free:
- free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2216,7 +2302,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
- int i, r;
+ unsigned int i, r;
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2243,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- int cpu;
m = rcu_dereference_protected(priv->match, true);
- if (m) {
- rcu_barrier();
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(m, cpu);
- free_percpu(m->scratch);
- pipapo_free_fields(m);
- kfree(m);
- priv->match = NULL;
- }
if (priv->clone) {
- m = priv->clone;
-
- nft_set_pipapo_match_destroy(ctx, set, m);
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(priv->clone, cpu);
- free_percpu(priv->clone->scratch);
-
- pipapo_free_fields(priv->clone);
- kfree(priv->clone);
+ nft_set_pipapo_match_destroy(ctx, set, priv->clone);
+ pipapo_free_match(priv->clone);
priv->clone = NULL;
+ } else {
+ nft_set_pipapo_match_destroy(ctx, set, m);
}
+
+ pipapo_free_match(m);
}
/**
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 42464e7c24..4a2ff85ce1 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -70,15 +70,9 @@
#define NFT_PIPAPO_ALIGN_HEADROOM \
(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
-#define NFT_PIPAPO_LT_ASSIGN(field, x) \
- do { \
- (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \
- (field)->lt = (x); \
- } while (0)
#else
#define NFT_PIPAPO_ALIGN_HEADROOM 0
#define NFT_PIPAPO_LT_ALIGN(lt) (lt)
-#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x))
#endif /* NFT_PIPAPO_ALIGN */
#define nft_pipapo_for_each_field(field, index, match) \
@@ -110,22 +104,20 @@ union nft_pipapo_map_bucket {
/**
* struct nft_pipapo_field - Lookup, mapping tables and related data for a field
- * @groups: Amount of bit groups
* @rules: Number of inserted rules
* @bsize: Size of each bucket in lookup table, in longs
+ * @rules_alloc: Number of allocated rules, always >= rules
+ * @groups: Amount of bit groups
* @bb: Number of bits grouped together in lookup table buckets
* @lt: Lookup table: 'groups' rows of buckets
- * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
* @mt: Mapping table: one bucket per rule
*/
struct nft_pipapo_field {
- int groups;
- unsigned long rules;
- size_t bsize;
- int bb;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long *lt_aligned;
-#endif
+ unsigned int rules;
+ unsigned int bsize;
+ unsigned int rules_alloc;
+ u8 groups;
+ u8 bb;
unsigned long *lt;
union nft_pipapo_map_bucket *mt;
};
@@ -145,15 +137,15 @@ struct nft_pipapo_scratch {
/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count: Amount of fields in set
- * @scratch: Preallocated per-CPU maps for partial matching results
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
+ * @scratch: Preallocated per-CPU maps for partial matching results
* @rcu: Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
- int field_count;
+ u8 field_count;
+ unsigned int bsize_max;
struct nft_pipapo_scratch * __percpu *scratch;
- size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
};
@@ -163,14 +155,12 @@ struct nft_pipapo_match {
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
- * @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
- bool dirty;
unsigned long last_gc;
};
@@ -186,7 +176,8 @@ struct nft_pipapo_elem {
struct nft_set_ext ext;
};
-int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
+int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
+ unsigned long *dst,
const union nft_pipapo_map_bucket *mt, bool match_only);
/**
@@ -287,4 +278,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
return size;
}
+/**
+ * pipapo_resmap_init() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Initialize all bits covered by the first field to one, so that after
+ * the first step, only the matching bits of the first bit group remain.
+ *
+ * If other fields have a large bitmap, set remainder of res_map to 0.
+ */
+static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ for (i = 0; i < f->bsize; i++)
+ res_map[i] = ULONG_MAX;
+
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
#endif /* _NFT_SET_PIPAPO_H */
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index d08407d589..b8d3c3213e 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1036,6 +1036,7 @@ nothing:
/**
* nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
+ * @mdata: Matching data, including mapping table
* @map: Previous match result, used as initial bitmap
* @fill: Destination bitmap to be filled with current match result
* @f: Field, containing lookup and mapping tables
@@ -1051,7 +1052,8 @@ nothing:
* Return: -1 on no match, rule index of match if @last, otherwise first long
* word index to be checked next (i.e. first filled word).
*/
-static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
+static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata,
+ unsigned long *map, unsigned long *fill,
const struct nft_pipapo_field *f,
int offset, const u8 *pkt,
bool first, bool last)
@@ -1060,7 +1062,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
int i, ret = -1, b;
if (first)
- memset(map, 0xff, bsize * sizeof(*map));
+ pipapo_resmap_init(mdata, map);
for (i = offset; i < bsize; i++) {
if (f->bb == 8)
@@ -1137,8 +1139,14 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
bool map_index;
int i, ret = 0;
- if (unlikely(!irq_fpu_usable()))
- return nft_pipapo_lookup(net, set, key, ext);
+ local_bh_disable();
+
+ if (unlikely(!irq_fpu_usable())) {
+ bool fallback_res = nft_pipapo_lookup(net, set, key, ext);
+
+ local_bh_enable();
+ return fallback_res;
+ }
m = rcu_dereference(priv->match);
@@ -1153,6 +1161,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
+ local_bh_enable();
return false;
}
@@ -1186,7 +1195,7 @@ next_match:
} else if (f->groups == 16) {
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1202,7 +1211,7 @@ next_match:
} else if (f->groups == 32) {
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1233,6 +1242,7 @@ out:
if (i % 2)
scratch->map_index = !map_index;
kernel_fpu_end();
+ local_bh_enable();
return ret >= 0;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index f735d79d8b..60a76e6e34 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -174,8 +174,8 @@ struct nft_tunnel_opts {
struct erspan_metadata erspan;
u8 data[IP_TUNNEL_OPTS_MAX];
} u;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
u32 len;
- __be16 flags;
};
struct nft_tunnel_obj {
@@ -271,7 +271,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
opts->len = sizeof(struct vxlan_metadata);
- opts->flags = TUNNEL_VXLAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags);
return 0;
}
@@ -325,7 +326,8 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
opts->u.erspan.version = version;
opts->len = sizeof(struct erspan_metadata);
- opts->flags = TUNNEL_ERSPAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags);
return 0;
}
@@ -366,7 +368,8 @@ static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
opt->length = data_len / 4;
opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
- opts->flags = TUNNEL_GENEVE_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags);
return 0;
}
@@ -385,8 +388,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
struct nft_tunnel_opts *opts)
{
struct nlattr *nla;
- __be16 type = 0;
int err, rem;
+ u32 type = 0;
err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
nft_tunnel_opts_policy, NULL);
@@ -401,7 +404,7 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_vxlan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
if (type)
@@ -409,15 +412,15 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_erspan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
err = nft_tunnel_obj_geneve_init(nla, opts);
if (err)
return err;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
default:
return -EOPNOTSUPP;
@@ -454,7 +457,9 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
memset(&info, 0, sizeof(info));
info.mode = IP_TUNNEL_INFO_TX;
info.key.tun_id = key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
- info.key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+ __set_bit(IP_TUNNEL_KEY_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_NOCACHE_BIT, info.key.tun_flags);
if (tb[NFTA_TUNNEL_KEY_IP]) {
err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
@@ -483,11 +488,12 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
- info.key.tun_flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
- info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT,
+ info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
- info.key.tun_flags |= TUNNEL_SEQ;
+ __set_bit(IP_TUNNEL_SEQ_BIT, info.key.tun_flags);
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
@@ -583,7 +589,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
if (!nest)
return -1;
- if (opts->flags & TUNNEL_VXLAN_OPT) {
+ if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
if (!inner)
goto failure;
@@ -591,7 +597,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
htonl(opts->u.vxlan.gbp)))
goto inner_failure;
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
if (!inner)
goto failure;
@@ -613,7 +619,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
break;
}
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_GENEVE_OPT) {
+ } else if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -658,11 +664,11 @@ static int nft_tunnel_flags_dump(struct sk_buff *skb,
{
u32 flags = 0;
- if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
- if (!(info->key.tun_flags & TUNNEL_CSUM))
+ if (!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
- if (info->key.tun_flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_SEQ_NUMBER;
if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index acef4155f0..008419db81 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -179,43 +179,6 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
}
EXPORT_SYMBOL_GPL(nf_route);
-static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
-{
-#ifdef CONFIG_INET
- const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- if (!(iph->tos == rt_info->tos &&
- skb->mark == rt_info->mark &&
- iph->daddr == rt_info->daddr &&
- iph->saddr == rt_info->saddr))
- return ip_route_me_harder(entry->state.net, entry->state.sk,
- skb, RTN_UNSPEC);
- }
-#endif
- return 0;
-}
-
-int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
-{
- const struct nf_ipv6_ops *v6ops;
- int ret = 0;
-
- switch (entry->state.pf) {
- case AF_INET:
- ret = nf_ip_reroute(skb, entry);
- break;
- case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- ret = v6ops->reroute(skb, entry);
- break;
- }
- return ret;
-}
-
/* Only get and check the lengths, not do any hop-by-hop stuff. */
int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen)
{
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 21624d6831..da5d929c7c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1142,7 +1142,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
if (target->compat_from_user)
target->compat_from_user(t->data, ct->data);
else
- memcpy(t->data, ct->data, tsize - sizeof(*ct));
+ unsafe_memcpy(t->data, ct->data, tsize - sizeof(*ct),
+ /* UAPI 0-sized destination */);
tsize += off;
t->u.user.target_size = tsize;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 27511c90a2..cd9160bbc9 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -610,7 +610,7 @@ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *iter;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -666,8 +666,8 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *prev = NULL;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmask;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmask;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -857,7 +857,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ iter->bitmap[idx] |= (u64)bitmap
<< (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
@@ -876,7 +876,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
* Description:
* Starting at @offset, walk the bitmap from left to right until either the
* desired bit is found or we reach the end. Return the bit offset, -1 if
- * not found, or -2 if error.
+ * not found.
*/
int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
u32 offset, u8 state)
@@ -965,6 +965,7 @@ int netlbl_enabled(void)
* @sk: the socket to label
* @family: protocol family
* @secattr: the security attributes
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Attach the correct label to the given socket using the security attributes
@@ -977,7 +978,8 @@ int netlbl_enabled(void)
*/
int netlbl_sock_setattr(struct sock *sk,
u16 family,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val;
struct netlbl_dom_map *dom_entry;
@@ -997,7 +999,7 @@ int netlbl_sock_setattr(struct sock *sk,
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
dom_entry->def.cipso,
- secattr);
+ secattr, sk_locked);
break;
case NETLBL_NLTYPE_UNLABELED:
ret_val = 0;
@@ -1091,6 +1093,28 @@ int netlbl_sock_getattr(struct sock *sk,
}
/**
+ * netlbl_sk_lock_check - Check if the socket lock has been acquired.
+ * @sk: the socket to be checked
+ *
+ * Return: true if socket @sk is locked or if lock debugging is disabled at
+ * runtime or compile-time; false otherwise
+ *
+ */
+#ifdef CONFIG_LOCKDEP
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ if (debug_locks)
+ return lockdep_sock_is_held(sk);
+ return true;
+}
+#else
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ return true;
+}
+#endif
+
+/**
* netlbl_conn_setattr - Label a connected socket using the correct protocol
* @sk: the socket to label
* @addr: the destination address
@@ -1126,7 +1150,8 @@ int netlbl_conn_setattr(struct sock *sk,
switch (entry->type) {
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
- entry->cipso, secattr);
+ entry->cipso, secattr,
+ netlbl_sk_lock_check(sk));
break;
case NETLBL_NLTYPE_UNLABELED:
/* just delete the protocols we support for right now
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ff31535126..fa9c090cf6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -59,7 +59,6 @@
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
-#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>
@@ -73,6 +72,7 @@
#include <trace/events/netlink.h>
#include "af_netlink.h"
+#include "genetlink.h"
struct listeners {
struct rcu_head rcu;
@@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
"nlk_cb_mutex-MAX_LINKS"
};
-static int netlink_dump(struct sock *sk);
+static int netlink_dump(struct sock *sk, bool lock_taken);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -636,7 +636,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol,
+ struct mutex *dump_cb_mutex, int protocol,
int kern)
{
struct sock *sk;
@@ -651,15 +651,11 @@ static int __netlink_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- if (cb_mutex) {
- nlk->cb_mutex = cb_mutex;
- } else {
- nlk->cb_mutex = &nlk->cb_def_mutex;
- mutex_init(nlk->cb_mutex);
- lockdep_set_class_and_name(nlk->cb_mutex,
+ mutex_init(&nlk->nl_cb_mutex);
+ lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- }
+ nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -1206,23 +1202,21 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
+ size_t head_size = SKB_HEAD_ALIGN(size);
struct sk_buff *skb;
void *data;
- if (size <= NLMSG_GOODSIZE || broadcast)
+ if (head_size <= PAGE_SIZE || broadcast)
return alloc_skb(size, GFP_KERNEL);
- size = SKB_DATA_ALIGN(size) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
- data = vmalloc(size);
- if (data == NULL)
+ data = kvmalloc(head_size, GFP_KERNEL);
+ if (!data)
return NULL;
- skb = __build_skb(data, size);
- if (skb == NULL)
- vfree(data);
- else
+ skb = __build_skb(data, head_size);
+ if (!skb)
+ kvfree(data);
+ else if (is_vmalloc_addr(data))
skb->destructor = netlink_skb_destructor;
return skb;
@@ -1779,6 +1773,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
netlink_unlock_table();
return err;
}
+ case NETLINK_LISTEN_ALL_NSID:
+ flag = NETLINK_F_LISTEN_ALL_NSID;
+ break;
case NETLINK_CAP_ACK:
flag = NETLINK_F_CAP_ACK;
break;
@@ -1987,7 +1984,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, false);
if (ret) {
WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
@@ -2168,6 +2165,69 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
}
EXPORT_SYMBOL(__nlmsg_put);
+static size_t
+netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
+ const struct netlink_ext_ack *extack)
+{
+ size_t tlvlen;
+
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
+ return 0;
+
+ tlvlen = 0;
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+
+ /* Following attributes are only reported as error (not warning) */
+ if (!err)
+ return tlvlen;
+
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->policy)
+ tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ if (extack->miss_type)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->miss_nest)
+ tlvlen += nla_total_size(sizeof(u32));
+
+ return tlvlen;
+}
+
+static void
+netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
+{
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
+
+ if (!err)
+ return;
+
+ if (extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr - (const u8 *)nlh));
+ if (extack->policy)
+ netlink_policy_dump_write_attr(skb, extack->policy,
+ NLMSGERR_ATTR_POLICY);
+ if (extack->miss_type)
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
+ extack->miss_type));
+ if (extack->miss_nest &&
+ !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
+ (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
+ (u8 *)extack->miss_nest - (const u8 *)nlh));
+}
+
/*
* It looks a bit ugly.
* It would be better to create kernel thread.
@@ -2178,6 +2238,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
struct nlmsghdr *nlh;
+ size_t extack_len;
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
@@ -2187,16 +2248,20 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
+ extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack);
+ if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
- if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
+ if (skb_tailroom(skb) >= extack_len) {
+ netlink_ack_tlv_fill(cb->skb, skb, cb->nlh,
+ nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
+ }
}
return 0;
}
-static int netlink_dump(struct sock *sk)
+static int netlink_dump(struct sock *sk, bool lock_taken)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
@@ -2208,7 +2273,8 @@ static int netlink_dump(struct sock *sk)
int alloc_min_size;
int alloc_size;
- mutex_lock(nlk->cb_mutex);
+ if (!lock_taken)
+ mutex_lock(&nlk->nl_cb_mutex);
if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
@@ -2260,14 +2326,33 @@ static int netlink_dump(struct sock *sk)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
+ struct mutex *extra_mutex = nlk->dump_cb_mutex;
+
cb->extack = &extack;
+
+ if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
+ extra_mutex = NULL;
+ if (extra_mutex)
+ mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
+ if (extra_mutex)
+ mutex_unlock(extra_mutex);
+
+ /* EMSGSIZE plus something already in the skb means
+ * that there's more to dump but current skb has filled up.
+ * If the callback really wants to return EMSGSIZE to user space
+ * it needs to do so again, on the next cb->dump() call,
+ * without putting data in the skb.
+ */
+ if (nlk->dump_done_errno == -EMSGSIZE && skb->len)
+ nlk->dump_done_errno = skb->len;
+
cb->extack = NULL;
}
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2301,13 +2386,13 @@ static int netlink_dump(struct sock *sk)
WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
module_put(module);
consume_skb(skb);
return 0;
errout_skb:
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
kfree_skb(skb);
return err;
}
@@ -2330,7 +2415,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
- mutex_lock(nlk->cb_mutex);
+ mutex_lock(&nlk->nl_cb_mutex);
/* A dump is in progress... */
if (nlk->cb_running) {
ret = -EBUSY;
@@ -2350,6 +2435,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
+ cb->flags = control->flags;
cb->skb = skb;
cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
@@ -2365,9 +2451,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
- mutex_unlock(nlk->cb_mutex);
-
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, true);
sock_put(sk);
@@ -2383,76 +2467,13 @@ error_put:
module_put(control->module);
error_unlock:
sock_put(sk);
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
error_free:
kfree_skb(skb);
return ret;
}
EXPORT_SYMBOL(__netlink_dump_start);
-static size_t
-netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
- const struct netlink_ext_ack *extack)
-{
- size_t tlvlen;
-
- if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
- return 0;
-
- tlvlen = 0;
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
-
- /* Following attributes are only reported as error (not warning) */
- if (!err)
- return tlvlen;
-
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->policy)
- tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
- if (extack->miss_type)
- tlvlen += nla_total_size(sizeof(u32));
- if (extack->miss_nest)
- tlvlen += nla_total_size(sizeof(u32));
-
- return tlvlen;
-}
-
-static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- struct nlmsghdr *nlh, int err,
- const struct netlink_ext_ack *extack)
-{
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len, extack->cookie));
-
- if (!err)
- return;
-
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr - (u8 *)nlh));
- if (extack->policy)
- netlink_policy_dump_write_attr(skb, extack->policy,
- NLMSGERR_ATTR_POLICY);
- if (extack->miss_type)
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
- extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
- (u8 *)extack->miss_nest - (u8 *)nlh));
-}
-
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 2145979b99..9751e29d4b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,9 @@ struct netlink_sock {
bool cb_running;
int dump_done_errno;
struct netlink_callback cb;
- struct mutex *cb_mutex;
- struct mutex cb_def_mutex;
+ struct mutex nl_cb_mutex;
+
+ struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 1eeff94228..61981e01fd 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -207,7 +207,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
- return err < 0 ? err : skb->len;
+ return err <= 0 ? err : skb->len;
}
static int netlink_diag_dump_done(struct netlink_callback *cb)
@@ -241,6 +241,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler netlink_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_NETLINK,
.dump = netlink_diag_handler_dump,
};
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8c7af02f84..feb54c63a1 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -22,6 +22,8 @@
#include <net/sock.h>
#include <net/genetlink.h>
+#include "genetlink.h"
+
static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
static DECLARE_RWSEM(cb_lock);
@@ -1232,7 +1234,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
if (hdr == NULL)
- return -1;
+ return -EMSGSIZE;
if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) ||
nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) ||
@@ -1355,6 +1357,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
int fams_to_skip = cb->args[0];
unsigned int id;
+ int err = 0;
idr_for_each_entry(&genl_fam_idr, rt, id) {
if (!rt->netnsok && !net_eq(net, &init_net))
@@ -1363,16 +1366,17 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
if (n++ < fams_to_skip)
continue;
- if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- skb, CTRL_CMD_NEWFAMILY) < 0) {
+ err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, CTRL_CMD_NEWFAMILY);
+ if (err) {
n--;
break;
}
}
cb->args[0] = n;
- return skb->len;
+ return err;
}
static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
@@ -1836,6 +1840,9 @@ static int genl_bind(struct net *net, int group)
!ns_capable(net->user_ns, CAP_SYS_ADMIN))
ret = -EPERM;
+ if (family->bind)
+ family->bind(i);
+
break;
}
@@ -1843,12 +1850,39 @@ static int genl_bind(struct net *net, int group)
return ret;
}
+static void genl_unbind(struct net *net, int group)
+{
+ const struct genl_family *family;
+ unsigned int id;
+
+ down_read(&cb_lock);
+
+ idr_for_each_entry(&genl_fam_idr, family, id) {
+ int i;
+
+ if (family->n_mcgrps == 0)
+ continue;
+
+ i = group - family->mcgrp_offset;
+ if (i < 0 || i >= family->n_mcgrps)
+ continue;
+
+ if (family->unbind)
+ family->unbind(i);
+
+ break;
+ }
+
+ up_read(&cb_lock);
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
+ .unbind = genl_unbind,
.release = genl_release,
};
diff --git a/net/netlink/genetlink.h b/net/netlink/genetlink.h
new file mode 100644
index 0000000000..89bd9d2631
--- /dev/null
+++ b/net/netlink/genetlink.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_GENETLINK_H
+#define __NET_GENETLINK_H
+
+#include <linux/wait.h>
+
+/* for synchronisation between af_netlink and genetlink */
+extern atomic_t genl_sk_destructing_cnt;
+extern wait_queue_head_t genl_sk_destructing_waitq;
+
+#endif /* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 104a80b754..6ee148f0e6 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -772,8 +772,8 @@ out_release:
return err;
}
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int nr_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 70480869ad..bd2b17b219 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
return 0;
}
-static inline void __nr_remove_node(struct nr_node *nr_node)
+static void nr_remove_node_locked(struct nr_node *nr_node)
{
+ lockdep_assert_held(&nr_node_list_lock);
+
hlist_del_init(&nr_node->node_node);
nr_node_put(nr_node);
}
-#define nr_remove_node_locked(__node) \
- __nr_remove_node(__node)
-
-static void nr_remove_node(struct nr_node *nr_node)
-{
- spin_lock_bh(&nr_node_list_lock);
- __nr_remove_node(nr_node);
- spin_unlock_bh(&nr_node_list_lock);
-}
-
static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
{
hlist_del_init(&nr_neigh->neigh_node);
@@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
return -EINVAL;
}
+ spin_lock_bh(&nr_node_list_lock);
nr_node_lock(nr_node);
for (i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
@@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node->count--;
if (nr_node->count == 0) {
- nr_remove_node(nr_node);
+ nr_remove_node_locked(nr_node);
} else {
switch (i) {
case 0:
@@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node_put(nr_node);
}
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
return 0;
}
}
nr_neigh_put(nr_neigh);
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
nr_node_put(nr_node);
return -EINVAL;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 4e7c968cde..5e3ca068f0 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
+ if (sk->sk_state == TCP_LISTEN)
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 79fb2d3f47..7dc0fa628f 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -140,7 +140,6 @@ static struct ctl_table nr_table[] = {
.extra1 = &min_reset,
.extra2 = &max_reset
},
- { }
};
int __init nr_register_sysctl(void)
diff --git a/net/nfc/core.c b/net/nfc/core.c
index eb2c0959e5..e58dc64050 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,7 +1015,7 @@ static void nfc_check_pres_timeout(struct timer_list *t)
schedule_work(&dev->check_pres_work);
}
-struct class nfc_class = {
+const struct class nfc_class = {
.name = "nfc",
.dev_release = nfc_release,
};
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 2140f67246..ba91284f40 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -30,15 +30,19 @@ exit:
return r;
}
+static void nfc_llc_del_engine(struct nfc_llc_engine *llc_engine)
+{
+ list_del(&llc_engine->entry);
+ kfree_const(llc_engine->name);
+ kfree(llc_engine);
+}
+
void nfc_llc_exit(void)
{
struct nfc_llc_engine *llc_engine, *n;
- list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) {
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
- }
+ list_for_each_entry_safe(llc_engine, n, &llc_engines, entry)
+ nfc_llc_del_engine(llc_engine);
}
int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
@@ -49,7 +53,7 @@ int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
if (llc_engine == NULL)
return -ENOMEM;
- llc_engine->name = kstrdup(name, GFP_KERNEL);
+ llc_engine->name = kstrdup_const(name, GFP_KERNEL);
if (llc_engine->name == NULL) {
kfree(llc_engine);
return -ENOMEM;
@@ -82,9 +86,7 @@ void nfc_llc_unregister(const char *name)
if (llc_engine == NULL)
return;
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
+ nfc_llc_del_engine(llc_engine);
}
struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 819157bbb5..57a2f97004 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_RW) {
err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_MIUX) {
err = -EINVAL;
@@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
}
static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *new_sk;
@@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
goto error;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection. */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 0d26c8ec99..f456a5911e 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1463,6 +1463,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
ndev->ops->n_core_ops);
}
+static bool nci_valid_size(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE);
+ unsigned int hdr_size = NCI_CTRL_HDR_SIZE;
+
+ if (skb->len < hdr_size ||
+ !nci_plen(skb->data) ||
+ skb->len < hdr_size + nci_plen(skb->data)) {
+ return false;
+ }
+ return true;
+}
+
/* ---- NCI TX Data worker thread ---- */
static void nci_tx_work(struct work_struct *work)
@@ -1516,9 +1529,9 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
- if (!nci_plen(skb->data)) {
+ if (!nci_valid_size(skb)) {
kfree_skb(skb);
- break;
+ continue;
}
/* Process frame */
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index aa1dbf654c..dd2ce73a24 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -969,8 +969,7 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
int rc;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
- !info->attrs[NFC_ATTR_TARGET_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1018,8 +1017,7 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg = NULL;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
- !info->attrs[NFC_ATTR_FIRMWARE_NAME])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index f4a38bd6a7..bfb7758063 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop);
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int outer_hlen, mac_len, nsh_len;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
- unsigned int nsh_len, mac_len;
- __be16 proto;
+ __be16 outer_proto, proto;
skb_reset_network_header(skb);
+ outer_proto = skb->protocol;
+ outer_hlen = skb_mac_header_len(skb);
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
}
for (skb = segs; skb; skb = skb->next) {
- skb->protocol = htons(ETH_P_NSH);
- __skb_push(skb, nsh_len);
- skb->mac_header = mac_offset;
- skb->network_header = skb->mac_header + mac_len;
+ skb->protocol = outer_proto;
+ __skb_push(skb, nsh_len + outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, outer_hlen);
skb->mac_len = mac_len;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca8..9642255808 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -936,6 +936,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
pskb_trim(skb, ovs_mac_header_len(key));
}
+ /* Need to set the pkt_type to involve the routing layer. The
+ * packet movement through the OVS datapath doesn't generally
+ * use routing, but this is needed for tunnel cases.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 2928c142a2..3b980bf277 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 11c69415c6..99d72543ab 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -15,7 +15,6 @@
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/etherdevice.h>
-#include <linux/genetlink.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 33b21a0c05..8a848ce72e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*/
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
+ memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
+
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ebc5728aab..f224d9bcea 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -152,6 +152,13 @@ static void update_range(struct sw_flow_match *match,
sizeof((match)->key->field)); \
} while (0)
+#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ bitmap_size(nbits), is_mask); \
+ bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \
+ value_p, nbits); \
+})
+
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
@@ -670,8 +677,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
bool log)
{
bool ttl = false, ipv4 = false, ipv6 = false;
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
bool info_bridge_mode = false;
- __be16 tun_flags = 0;
int opts_type = 0;
struct nlattr *a;
int rem;
@@ -697,7 +704,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_ID:
SW_FLOW_KEY_PUT(match, tun_key.tun_id,
nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
@@ -729,10 +736,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_TP_SRC:
SW_FLOW_KEY_PUT(match, tun_key.tp_src,
@@ -743,7 +750,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
nla_get_be16(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_OAM:
- tun_flags |= TUNNEL_OAM;
+ __set_bit(IP_TUNNEL_OAM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
if (opts_type) {
@@ -755,7 +762,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
@@ -768,7 +775,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
@@ -784,7 +791,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
@@ -798,7 +805,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
}
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags,
+ __IP_TUNNEL_FLAG_NUM, is_mask);
if (is_mask)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
else
@@ -823,13 +831,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
if (ipv4) {
if (info_bridge_mode) {
+ __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+
if (match->key->tun_key.u.ipv4.src ||
match->key->tun_key.u.ipv4.dst ||
match->key->tun_key.tp_src ||
match->key->tun_key.tp_dst ||
match->key->tun_key.ttl ||
match->key->tun_key.tos ||
- tun_flags & ~TUNNEL_KEY) {
+ !ip_tunnel_flags_empty(tun_flags)) {
OVS_NLERR(log, "IPv4 tun info is not correct");
return -EINVAL;
}
@@ -874,7 +884,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len,
unsigned short tun_proto, u8 mode)
{
- if (output->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
OVS_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -910,10 +920,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (output->tp_src &&
@@ -922,18 +932,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if (output->tp_dst &&
nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (swkey_tun_opts_len) {
- if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT,
+ output->tun_flags) &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -2029,7 +2041,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
- if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+ if (ip_tunnel_is_options_present(output->tun_key.tun_flags))
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
@@ -2752,7 +2764,8 @@ static int validate_geneve_opts(struct sw_flow_key *key)
opts_len -= len;
}
- key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ if (crit_opt)
+ __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags);
return 0;
}
@@ -2760,6 +2773,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
+ IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { };
struct sw_flow_match match;
struct sw_flow_key key;
struct metadata_dst *tun_dst;
@@ -2767,9 +2781,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
int err = 0, start, opts_type;
- __be16 dst_opt_type;
- dst_opt_type = 0;
ovs_match_init(&match, &key, true, NULL);
opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
@@ -2781,13 +2793,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
err = validate_geneve_opts(&key);
if (err < 0)
return err;
- dst_opt_type = TUNNEL_GENEVE_OPT;
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- dst_opt_type = TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- dst_opt_type = TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type);
break;
}
}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index ed11cd12b5..8bbf983cd2 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
-#include <linux/genetlink.h>
#include <linux/skbuff.h>
#include <linux/bits.h>
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
index 3eb35d9eb7..74d75aaebe 100644
--- a/net/openvswitch/openvswitch_trace.h
+++ b/net/openvswitch/openvswitch_trace.h
@@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
@@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 903537a5da..91a11067e4 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -82,6 +82,13 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
err = -ENODEV;
goto error_free_vport;
}
+ /* Ensure that the device exists and that the provided
+ * name is not one of its aliases.
+ */
+ if (strcmp(name, ovs_vport_name(vport))) {
+ err = -ENODEV;
+ goto error_put;
+ }
netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
if (vport->dev->flags & IFF_LOOPBACK ||
(vport->dev->type != ARPHRD_ETHER &&
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e6a8701a38..4692a9ef11 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,6 +538,61 @@ static void *packet_current_frame(struct packet_sock *po,
return packet_lookup_frame(po, rb, rb->head, status);
}
+static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev)
+{
+ u8 *skb_orig_data = skb->data;
+ int skb_orig_len = skb->len;
+ struct vlan_hdr vhdr, *vh;
+ unsigned int header_len;
+
+ if (!dev)
+ return 0;
+
+ /* In the SOCK_DGRAM scenario, skb data starts at the network
+ * protocol, which is after the VLAN headers. The outer VLAN
+ * header is at the hard_header_len offset in non-variable
+ * length link layer headers. If it's a VLAN device, the
+ * min_header_len should be used to exclude the VLAN header
+ * size.
+ */
+ if (dev->min_header_len == dev->hard_header_len)
+ header_len = dev->hard_header_len;
+ else if (is_vlan_dev(dev))
+ header_len = dev->min_header_len;
+ else
+ return 0;
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr);
+ if (skb_orig_data != skb->data) {
+ skb->data = skb_orig_data;
+ skb->len = skb_orig_len;
+ }
+ if (unlikely(!vh))
+ return 0;
+
+ return ntohs(vh->h_vlan_TCI);
+}
+
+static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
+{
+ __be16 proto = skb->protocol;
+
+ if (unlikely(eth_type_vlan(proto))) {
+ u8 *skb_orig_data = skb->data;
+ int skb_orig_len = skb->len;
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ proto = __vlan_get_protocol(skb, proto, NULL);
+ if (skb_orig_data != skb->data) {
+ skb->data = skb_orig_data;
+ skb->len = skb_orig_len;
+ }
+ }
+
+ return proto;
+}
+
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
del_timer_sync(&pkc->retire_blk_timer);
@@ -1007,10 +1062,16 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
struct tpacket3_hdr *ppd)
{
+ struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
+
if (skb_vlan_tag_present(pkc->skb)) {
ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
+ ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev);
+ ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
+ ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
ppd->hv1.tp_vlan_tci = 0;
ppd->hv1.tp_vlan_tpid = 0;
@@ -2318,7 +2379,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
if (po->tp_version <= TPACKET_V2) {
if (macoff + snaplen > po->rx_ring.frame_size) {
- if (po->copy_thresh &&
+ if (READ_ONCE(po->copy_thresh) &&
atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
if (skb_shared(skb)) {
copy_skb = skb_clone(skb, GFP_ATOMIC);
@@ -2428,6 +2489,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
+ h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev);
+ h.h2->tp_vlan_tpid = ntohs(skb->protocol);
+ status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
h.h2->tp_vlan_tci = 0;
h.h2->tp_vlan_tpid = 0;
@@ -2457,7 +2522,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
+ vlan_get_protocol_dgram(skb) : skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
@@ -2522,8 +2588,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
- if (!packet_read_pending(&po->tx_ring))
- complete(&po->skb_completion);
+ complete(&po->skb_completion);
}
sock_wfree(skb);
@@ -3483,7 +3548,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* Original length was stored in sockaddr_ll fields */
origlen = PACKET_SKB_CB(skb)->sa.origlen;
sll->sll_family = AF_PACKET;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
+ vlan_get_protocol_dgram(skb) : skb->protocol;
}
sock_recv_cmsgs(msg, sk, skb);
@@ -3540,6 +3606,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
aux.tp_vlan_tci = skb_vlan_tag_get(skb);
aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex);
+ if (dev) {
+ aux.tp_vlan_tci = vlan_get_tci(skb, dev);
+ aux.tp_vlan_tpid = ntohs(skb->protocol);
+ aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
+ } else {
+ aux.tp_vlan_tci = 0;
+ aux.tp_vlan_tpid = 0;
+ }
+ rcu_read_unlock();
} else {
aux.tp_vlan_tci = 0;
aux.tp_vlan_tpid = 0;
@@ -3800,28 +3881,30 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
case PACKET_TX_RING:
{
union tpacket_req_u req_u;
- int len;
+ ret = -EINVAL;
lock_sock(sk);
switch (po->tp_version) {
case TPACKET_V1:
case TPACKET_V2:
- len = sizeof(req_u.req);
+ if (optlen < sizeof(req_u.req))
+ break;
+ ret = copy_from_sockptr(&req_u.req, optval,
+ sizeof(req_u.req)) ?
+ -EINVAL : 0;
break;
case TPACKET_V3:
default:
- len = sizeof(req_u.req3);
+ if (optlen < sizeof(req_u.req3))
+ break;
+ ret = copy_from_sockptr(&req_u.req3, optval,
+ sizeof(req_u.req3)) ?
+ -EINVAL : 0;
break;
}
- if (optlen < len) {
- ret = -EINVAL;
- } else {
- if (copy_from_sockptr(&req_u.req, optval, len))
- ret = -EFAULT;
- else
- ret = packet_set_ring(sk, &req_u, 0,
- optname == PACKET_TX_RING);
- }
+ if (!ret)
+ ret = packet_set_ring(sk, &req_u, 0,
+ optname == PACKET_TX_RING);
release_sock(sk);
return ret;
}
@@ -3834,7 +3917,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- pkt_sk(sk)->copy_thresh = val;
+ WRITE_ONCE(pkt_sk(sk)->copy_thresh, val);
return 0;
}
case PACKET_VERSION:
@@ -4088,6 +4171,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
case PACKET_VNET_HDR_SZ:
val = READ_ONCE(po->vnet_hdr_sz);
break;
+ case PACKET_COPY_THRESH:
+ val = READ_ONCE(pkt_sk(sk)->copy_thresh);
+ break;
case PACKET_VERSION:
val = po->tp_version;
break;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 9a7980e330..47f69f3dbf 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -17,7 +17,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_index = po->ifindex;
pinfo.pdi_version = po->tp_version;
pinfo.pdi_reserve = po->tp_reserve;
- pinfo.pdi_copy_thresh = po->copy_thresh;
+ pinfo.pdi_copy_thresh = READ_ONCE(po->copy_thresh);
pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp);
pinfo.pdi_flags = 0;
@@ -245,6 +245,7 @@ static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler packet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_PACKET,
.dump = packet_diag_handler_dump,
};
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3dd5f52bc1..53a858478e 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
sock_put(sk);
}
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
- bool kern)
+static struct sock *pep_sock_accept(struct sock *sk,
+ struct proto_accept_arg *arg)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
@@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- errp);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb)
return NULL;
@@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
/* Create a new to-be-accepted sock */
newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
- kern);
+ arg->kern);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
@@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
drop:
release_sock(sk);
kfree_skb(skb);
- *errp = err;
+ arg->err = err;
return newsk;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 59aebe2968..7008d40249 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -178,7 +178,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
if (nla_put_u8(skb, RTA_DST, dst) ||
- nla_put_u32(skb, RTA_OIF, dev->ifindex))
+ nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) +
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -263,6 +263,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ int err = 0;
u8 addr;
rcu_read_lock();
@@ -272,16 +273,16 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
continue;
- if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0)
- goto out;
+ err = fill_route(skb, dev, addr << 2,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE);
+ if (err < 0)
+ break;
}
-
-out:
rcu_read_unlock();
cb->args[0] = addr;
- return skb->len;
+ return err;
}
int __init phonet_netlink_register(void)
@@ -301,6 +302,6 @@ int __init phonet_netlink_register(void)
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
route_doit, NULL, 0);
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
- NULL, route_dumpit, 0);
+ NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED);
return 0;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1018340d89..5ce0b3ee5d 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -292,18 +292,17 @@ out:
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
- int err;
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
- newsk = sk->sk_prot->accept(sk, flags, &err, kern);
+ newsk = sk->sk_prot->accept(sk, arg);
if (!newsk)
- return err;
+ return arg->err;
lock_sock(newsk);
sock_graft(newsk, newsock);
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 0d0bf41381..82fc22467a 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -81,7 +81,6 @@ static struct ctl_table phonet_table[] = {
.mode = 0644,
.proc_handler = proc_local_port_range,
},
- { }
};
int __init phonet_sysctl_init(void)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index ddd211a151..a5d9b8446f 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -221,7 +221,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
- if (tun_key->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags) &&
nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
PSAMPLE_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -257,10 +257,10 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (tun_key->tp_src &&
@@ -269,15 +269,16 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
if (tun_key->tp_dst &&
nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags) &&
nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (tun_opts_len) {
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags) &&
nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_key->tun_flags) &&
nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -314,7 +315,7 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
int tun_opts_len = tun_info->options_len;
int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */
- if (tun_key->tun_flags & TUNNEL_KEY)
+ if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags))
sum += nla_total_size_64bit(sizeof(u64));
if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
@@ -337,20 +338,21 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
if (tun_key->tos)
sum += nla_total_size(sizeof(u8));
sum += nla_total_size(sizeof(u8)); /* TTL */
- if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
- if (tun_key->tun_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
if (tun_key->tp_src)
sum += nla_total_size(sizeof(u16));
if (tun_key->tp_dst)
sum += nla_total_size(sizeof(u16));
- if (tun_key->tun_flags & TUNNEL_OAM)
+ if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags))
sum += nla_total_size(0);
if (tun_opts_len) {
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags))
sum += nla_total_size(tun_opts_len);
- else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_key->tun_flags))
sum += nla_total_size(tun_opts_len);
}
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 9ced13c062..69f53625a0 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -118,6 +118,51 @@ static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
};
MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
+static int __maybe_unused qcom_mhi_qrtr_pm_suspend_late(struct device *dev)
+{
+ struct mhi_device *mhi_dev = container_of(dev, struct mhi_device, dev);
+ enum mhi_state state;
+
+ state = mhi_get_mhi_state(mhi_dev->mhi_cntrl);
+ /*
+ * If the device is in suspend state, then no need for the
+ * client driver to unprepare the channels.
+ */
+ if (state == MHI_STATE_M3)
+ return 0;
+
+ mhi_unprepare_from_transfer(mhi_dev);
+
+ return 0;
+}
+
+static int __maybe_unused qcom_mhi_qrtr_pm_resume_early(struct device *dev)
+{
+ struct mhi_device *mhi_dev = container_of(dev, struct mhi_device, dev);
+ enum mhi_state state;
+ int rc;
+
+ state = mhi_get_mhi_state(mhi_dev->mhi_cntrl);
+ /*
+ * If the device is in suspend state, we won't unprepare channels
+ * in suspend callback, therefore no need to prepare channels when
+ * resume.
+ */
+ if (state == MHI_STATE_M3)
+ return 0;
+
+ rc = mhi_prepare_for_transfer_autoqueue(mhi_dev);
+ if (rc)
+ dev_err(dev, "failed to prepare for autoqueue transfer %d\n", rc);
+
+ return rc;
+}
+
+static const struct dev_pm_ops qcom_mhi_qrtr_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(qcom_mhi_qrtr_pm_suspend_late,
+ qcom_mhi_qrtr_pm_resume_early)
+};
+
static struct mhi_driver qcom_mhi_qrtr_driver = {
.probe = qcom_mhi_qrtr_probe,
.remove = qcom_mhi_qrtr_remove,
@@ -126,6 +171,7 @@ static struct mhi_driver qcom_mhi_qrtr_driver = {
.id_table = qcom_mhi_qrtr_id_table,
.driver = {
.name = "qcom_mhi_qrtr",
+ .pm = &qcom_mhi_qrtr_pm_ops,
},
};
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index abb0c70ffc..654a3cc0d3 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -725,6 +725,24 @@ int qrtr_ns_init(void)
if (ret < 0)
goto err_wq;
+ /* As the qrtr ns socket owner and creator is the same module, we have
+ * to decrease the qrtr module reference count to guarantee that it
+ * remains zero after the ns socket is created, otherwise, executing
+ * "rmmod" command is unable to make the qrtr module deleted after the
+ * qrtr module is inserted successfully.
+ *
+ * However, the reference count is increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of qrtr socket's proto_ops struct; another is to increment the
+ * reference count of owner of qrtr proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(qrtr_ns.sock->ops->owner);
+ module_put(qrtr_ns.sock->sk->sk_prot_creator->owner);
+
return 0;
err_wq:
@@ -739,6 +757,15 @@ void qrtr_ns_remove(void)
{
cancel_work_sync(&qrtr_ns.work);
destroy_workqueue(qrtr_ns.workqueue);
+
+ /* sock_release() expects the two references that were put during
+ * qrtr_ns_init(). This function is only called during module remove,
+ * so try_stop_module() has already set the refcnt to 0. Use
+ * __module_get() instead of try_module_get() to successfully take two
+ * references.
+ */
+ __module_get(qrtr_ns.sock->ops->owner);
+ __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner);
sock_release(qrtr_ns.sock);
}
EXPORT_SYMBOL_GPL(qrtr_ns_remove);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index b4cc699c5f..c749c5525b 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -829,9 +829,7 @@ int rds_conn_init(void)
if (ret)
return ret;
- rds_conn_slab = kmem_cache_create("rds_connection",
- sizeof(struct rds_connection),
- 0, 0, NULL);
+ rds_conn_slab = KMEM_CACHE(rds_connection, 0);
if (!rds_conn_slab) {
rds_loop_net_exit();
return -ENOMEM;
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index e4e41b3afc..2af678e71e 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -103,7 +103,6 @@ static struct ctl_table rds_ib_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_ib_sysctl_exit(void)
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index e381bbcd9c..025f518a43 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -89,7 +89,6 @@ static struct ctl_table rds_sysctl_rds_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_sysctl_exit(void)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2dba7505b4..d8111ac83b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -86,7 +86,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
.proc_handler = rds_tcp_skbuf_handler,
.extra1 = &rds_tcp_min_rcvbuf,
},
- { }
};
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 05008ce5c4..d89bd8d0c3 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock)
int conn_state;
struct rds_conn_path *cp;
struct in6_addr *my_addr, *peer_addr;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ .kern = true,
+ };
#if !IS_ENABLED(CONFIG_IPV6)
struct in6_addr saddr, daddr;
#endif
@@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock)
if (ret)
goto out;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 4e32d65952..84529886c2 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -156,14 +156,12 @@ err_destroy:
return ret;
}
-static int rfkill_gpio_remove(struct platform_device *pdev)
+static void rfkill_gpio_remove(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
rfkill_unregister(rfkill->rfkill_dev);
rfkill_destroy(rfkill->rfkill_dev);
-
- return 0;
}
#ifdef CONFIG_ACPI
@@ -183,7 +181,7 @@ MODULE_DEVICE_TABLE(of, rfkill_of_match);
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
- .remove = rfkill_gpio_remove,
+ .remove_new = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ef81d019b2..59050caab6 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -919,8 +919,8 @@ out_release:
return err;
}
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rose_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index d391d7758f..d801315b70 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -112,7 +112,6 @@ static struct ctl_table rose_table[] = {
.extra1 = &min_window,
.extra2 = &max_window
},
- { }
};
void __init rose_register_sysctl(void)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 465bfe5eb0..f4844683e1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
@@ -487,7 +487,7 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
* rxrpc_kernel_set_max_life - Set maximum lifespan on a call
* @sock: The socket the call is on
* @call: The call to configure
- * @hard_timeout: The maximum lifespan of the call in jiffies
+ * @hard_timeout: The maximum lifespan of the call in ms
*
* Set the maximum lifespan of a call. The call will end with ETIME or
* ETIMEDOUT if it takes longer than this.
@@ -495,14 +495,14 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
unsigned long hard_timeout)
{
- unsigned long now;
+ ktime_t delay = ms_to_ktime(hard_timeout), expect_term_by;
mutex_lock(&call->user_mutex);
- now = jiffies;
- hard_timeout += now;
- WRITE_ONCE(call->expect_term_by, hard_timeout);
- rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+ expect_term_by = ktime_add(ktime_get_real(), delay);
+ WRITE_ONCE(call->expect_term_by, expect_term_by);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
mutex_unlock(&call->user_mutex);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7818aae1be..08de24658f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -198,8 +198,8 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
- struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
union {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
struct {
u16 offset; /* Offset of data */
u16 len; /* Length of data */
@@ -208,9 +208,12 @@ struct rxrpc_skb_priv {
};
struct {
rxrpc_seq_t first_ack; /* First packet in acks table */
+ rxrpc_seq_t prev_ack; /* Highest seq seen */
+ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */
+ u8 reason; /* Reason for ack */
u8 nr_acks; /* Number of acks+nacks */
u8 nr_nacks; /* Number of nacks */
- };
+ } ack;
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -248,10 +251,9 @@ struct rxrpc_security {
struct rxrpc_key_token *);
/* Work out how much data we can store in a packet, given an estimate
- * of the amount of data remaining.
+ * of the amount of data remaining and allocate a data buffer.
*/
- int (*how_much_data)(struct rxrpc_call *, size_t,
- size_t *, size_t *, size_t *);
+ struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remaining, gfp_t gfp);
/* impose security on a packet */
int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *);
@@ -292,6 +294,7 @@ struct rxrpc_local {
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
+ struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thread only) */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
struct sk_buff_head rx_delay_queue; /* Delay injection queue */
@@ -352,8 +355,8 @@ struct rxrpc_peer {
u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
u32 rttvar_us; /* smoothed mdev_max */
- u32 rto_j; /* Retransmission timeout in jiffies */
- u8 backoff; /* Backoff timeout */
+ u32 rto_us; /* Retransmission timeout in usec */
+ u8 backoff; /* Backoff timeout (as shift) */
u8 cong_ssthresh; /* Congestion slow-start threshold */
};
@@ -500,6 +503,8 @@ struct rxrpc_connection {
struct list_head proc_link; /* link in procfs list */
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */
+ struct mutex tx_data_alloc_lock;
struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
@@ -618,17 +623,17 @@ struct rxrpc_call {
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
struct sockaddr_rxrpc dest_srx; /* Destination address */
- unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
- unsigned long ack_lost_at; /* When ACK is figured as lost */
- unsigned long resend_at; /* When next resend needs to happen */
- unsigned long ping_at; /* When next to send a ping */
- unsigned long keepalive_at; /* When next to send a keepalive ping */
- unsigned long expect_rx_by; /* When we expect to get a packet by */
- unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
- unsigned long expect_term_by; /* When we expect call termination by */
- u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
- u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
- u32 hard_timo; /* Maximum lifetime or 0 (jif) */
+ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */
+ ktime_t ack_lost_at; /* When ACK is figured as lost */
+ ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t ping_at; /* When next to send a ping */
+ ktime_t keepalive_at; /* When next to send a keepalive ping */
+ ktime_t expect_rx_by; /* When we expect to get a packet by */
+ ktime_t expect_req_by; /* When we expect to get a request DATA packet by */
+ ktime_t expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (ms) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (ms) */
+ u32 hard_timo; /* Maximum lifetime or 0 (s) */
struct timer_list timer; /* Combined event timer */
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -673,7 +678,7 @@ struct rxrpc_call {
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
- u16 tx_backoff; /* Delay to insert due to Tx failure */
+ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */
u8 tx_winsize; /* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW 128
ktime_t tx_last_sent; /* Last time a transmission occurred */
@@ -692,7 +697,7 @@ struct rxrpc_call {
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
-#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
+#define RXRPC_MIN_CWND 4
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
@@ -788,40 +793,30 @@ struct rxrpc_send_params {
* Buffer of data to be output as a packet.
*/
struct rxrpc_txbuf {
- struct rcu_head rcu;
struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
+ rxrpc_serial_t serial; /* Last serial number transmitted with */
unsigned int call_debug_id;
unsigned int debug_id;
unsigned int len; /* Amount of data in buffer */
unsigned int space; /* Remaining data space */
unsigned int offset; /* Offset of fill point */
- unsigned long flags;
-#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */
-#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */
+ unsigned int flags;
+#define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */
+#define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */
+ __be16 cksum; /* Checksum to go in header */
+ unsigned short ack_rwind; /* ACK receive window */
u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
- struct {
- /* The packet for encrypting and DMA'ing. We align it such
- * that data[] aligns correctly for any crypto blocksize.
- */
- u8 pad[64 - sizeof(struct rxrpc_wire_header)];
- struct rxrpc_wire_header wire; /* Network-ready header */
- union {
- u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
- struct {
- struct rxrpc_ackpacket ack;
- DECLARE_FLEX_ARRAY(u8, acks);
- };
- };
- } __aligned(64);
+ u8 nr_kvec; /* Amount of kvec[] used */
+ struct kvec kvec[3];
};
static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
{
- return txb->wire.flags & RXRPC_CLIENT_INITIATED;
+ return txb->flags & RXRPC_CLIENT_INITIATED;
}
static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
@@ -869,17 +864,11 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
*/
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why);
-void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why);
-
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
@@ -1160,9 +1149,9 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
int rxrpc_send_abort_packet(struct rxrpc_call *);
-int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
@@ -1223,7 +1212,7 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
*/
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans);
void rxrpc_peer_init_rtt(struct rxrpc_peer *);
/*
@@ -1295,8 +1284,9 @@ static inline void rxrpc_sysctl_exit(void) {}
* txbuf.c
*/
extern atomic_t rxrpc_nr_txbuf;
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size);
void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0f78544d04..7bbb685047 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -23,14 +23,14 @@
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long now = jiffies;
- unsigned long ping_at = now + rxrpc_idle_ack_delay;
-
- if (time_before(ping_at, call->ping_at)) {
- WRITE_ONCE(call->ping_at, ping_at);
- rxrpc_reduce_call_timer(call, ping_at, now,
- rxrpc_timer_set_for_ping);
- trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay));
+ ktime_t now = ktime_get_real();
+ ktime_t ping_at = ktime_add(now, delay);
+
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ if (ktime_before(ping_at, call->ping_at)) {
+ call->ping_at = ping_at;
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_ping);
}
}
@@ -40,62 +40,18 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long expiry = rxrpc_soft_ack_delay;
- unsigned long now = jiffies, ack_at;
-
- if (rxrpc_soft_ack_delay < expiry)
- expiry = rxrpc_soft_ack_delay;
- if (call->peer->srtt_us != 0)
- ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
- else
- ack_at = expiry;
-
- ack_at += READ_ONCE(call->tx_backoff);
- ack_at += now;
- if (time_before(ack_at, call->delay_ack_at)) {
- WRITE_ONCE(call->delay_ack_at, ack_at);
- rxrpc_reduce_call_timer(call, ack_at, now,
- rxrpc_timer_set_for_ack);
- }
+ ktime_t now = ktime_get_real(), delay;
trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
-}
-
-/*
- * Queue an ACK for immediate transmission.
- */
-void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
- rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
-{
- struct rxrpc_txbuf *txb;
-
- if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return;
-
- rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
- rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
- if (!txb) {
- kleave(" = -ENOMEM");
- return;
- }
+ if (call->peer->srtt_us)
+ delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC;
+ else
+ delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay));
+ ktime_add_ms(delay, call->tx_backoff);
- txb->ack_why = why;
- txb->wire.seq = 0;
- txb->wire.type = RXRPC_PACKET_TYPE_ACK;
- txb->wire.flags |= RXRPC_SLOW_START_OK;
- txb->ack.bufferSpace = 0;
- txb->ack.maxSkew = 0;
- txb->ack.firstPacket = 0;
- txb->ack.previousPacket = 0;
- txb->ack.serial = htonl(serial);
- txb->ack.reason = ack_reason;
- txb->ack.nAcks = 0;
-
- trace_rxrpc_send_ack(call, why, ack_reason, serial);
- rxrpc_send_ack_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+ call->delay_ack_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -114,25 +70,19 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
struct rxrpc_ackpacket *ack = NULL;
struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
- unsigned long resend_at;
- rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
- ktime_t now, max_age, oldest, ack_ts;
- bool unacked = false;
+ rxrpc_seq_t transmitted = call->tx_transmitted;
+ ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
+ ktime_t resend_at = KTIME_MAX, now, delay;
+ bool unacked = false, did_send = false;
unsigned int i;
- LIST_HEAD(retrans_queue);
_enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
now = ktime_get_real();
- max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
- oldest = now;
if (list_empty(&call->tx_buffer))
goto no_resend;
- if (list_empty(&call->tx_buffer))
- goto no_further_resend;
-
trace_rxrpc_resend(call, ack_skb);
txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
@@ -143,12 +93,12 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = sp->first_ack + i;
+ seq = sp->ack.first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -160,19 +110,23 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
goto no_further_resend;
found_txb:
- if (after(ntohl(txb->wire.serial), call->acks_highest_serial))
+ resend_at = ktime_add(txb->last_sent, rto);
+ if (after(txb->serial, call->acks_highest_serial)) {
+ if (ktime_after(resend_at, now) &&
+ ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue; /* Ack point not yet reached */
+ }
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- }
+ trace_rxrpc_retransmit(call, txb->seq, txb->serial,
+ ktime_sub(resend_at, now));
- trace_rxrpc_retransmit(call, txb->seq,
- ktime_to_ns(ktime_sub(txb->last_sent,
- max_age)));
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ now = ktime_get_real();
if (list_is_last(&txb->call_link, &call->tx_buffer))
goto no_further_resend;
@@ -184,43 +138,46 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* seen. Anything between the soft-ACK table and that point will get
* ACK'd or NACK'd in due course, so don't worry about it here; here we
* need to consider retransmitting anything beyond that point.
- *
- * Note that ACK for a packet can beat the update of tx_transmitted.
*/
- if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted)))
+ if (after_eq(call->acks_prev_seq, call->tx_transmitted))
goto no_further_resend;
list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
- if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq)))
+ resend_at = ktime_add(txb->last_sent, rto);
+
+ if (before_eq(txb->seq, call->acks_prev_seq))
continue;
- if (after(txb->seq, READ_ONCE(call->tx_transmitted)))
+ if (after(txb->seq, call->tx_transmitted))
break; /* Not transmitted yet */
if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
- before(ntohl(txb->wire.serial), ntohl(ack->serial)))
+ before(txb->serial, ntohl(ack->serial)))
goto do_resend; /* Wasn't accounted for by a more recent ping. */
- if (ktime_after(txb->last_sent, max_age)) {
- if (ktime_before(txb->last_sent, oldest))
- oldest = txb->last_sent;
+ if (ktime_after(resend_at, now)) {
+ if (ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue;
}
do_resend:
unacked = true;
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
- }
+
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+ now = ktime_get_real();
}
no_further_resend:
no_resend:
- resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
- !list_empty(&retrans_queue));
- WRITE_ONCE(call->resend_at, resend_at);
+ if (resend_at < KTIME_MAX) {
+ delay = rxrpc_get_rto_backoff(call->peer, did_send);
+ resend_at = ktime_add(resend_at, delay);
+ trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset);
+ }
+ call->resend_at = resend_at;
if (unacked)
rxrpc_congestion_timeout(call);
@@ -229,25 +186,15 @@ no_resend:
* that an ACK got lost somewhere. Send a ping to find out instead of
* retransmitting data.
*/
- if (list_empty(&retrans_queue)) {
- rxrpc_reduce_call_timer(call, resend_at, jiffies,
- rxrpc_timer_set_for_resend);
- ack_ts = ktime_sub(now, call->acks_latest_ts);
- if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
- goto out;
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_lost_ack);
- goto out;
- }
+ if (!did_send) {
+ ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
+ call->peer->srtt_us >> 3);
- /* Retransmit the queue */
- while ((txb = list_first_entry_or_null(&retrans_queue,
- struct rxrpc_txbuf, tx_link))) {
- list_del_init(&txb->tx_link);
- rxrpc_transmit_one(call, txb);
+ if (ktime_sub(next_ping, now) <= 0)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_0_retrans);
}
-out:
_leave("");
}
@@ -257,13 +204,11 @@ out:
*/
static void rxrpc_begin_service_reply(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
-
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -320,7 +265,7 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
- if (txb->wire.flags & RXRPC_LAST_PACKET)
+ if (txb->flags & RXRPC_LAST_PACKET)
rxrpc_close_tx_phase(call);
rxrpc_transmit_one(call, txb);
@@ -372,8 +317,8 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
*/
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
- unsigned long now, next, t;
- bool resend = false, expired = false;
+ ktime_t now, t;
+ bool resend = false;
s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
@@ -397,70 +342,73 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
/* If we see our async-event poke, check for timeout trippage. */
- now = jiffies;
- t = READ_ONCE(call->expect_rx_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
- expired = true;
+ now = ktime_get_real();
+ t = ktime_sub(call->expect_rx_by, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_expect_rx);
+ goto expired;
}
- t = READ_ONCE(call->expect_req_by);
- if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
- time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
- expired = true;
+ t = ktime_sub(call->expect_req_by, now);
+ if (t <= 0) {
+ call->expect_req_by = KTIME_MAX;
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_idle);
+ goto expired;
+ }
}
- t = READ_ONCE(call->expect_term_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
- expired = true;
+ t = ktime_sub(READ_ONCE(call->expect_term_by), now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_hard);
+ goto expired;
}
- t = READ_ONCE(call->delay_ack_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
- cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->delay_ack_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_delayed_ack);
+ call->delay_ack_at = KTIME_MAX;
rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
- rxrpc_propose_ack_ping_for_lost_ack);
+ rxrpc_propose_ack_delayed_ack);
}
- t = READ_ONCE(call->ack_lost_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
- cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ack_lost_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack);
+ call->ack_lost_at = KTIME_MAX;
set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
}
- t = READ_ONCE(call->keepalive_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
- cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ping_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping);
+ call->ping_at = KTIME_MAX;
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_keepalive);
}
- t = READ_ONCE(call->ping_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
- cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_keepalive);
- }
-
- t = READ_ONCE(call->resend_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
- cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->resend_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend);
+ call->resend_at = KTIME_MAX;
resend = true;
}
- if (skb)
- rxrpc_input_call_packet(call, skb);
-
rxrpc_transmit_some_data(call);
+ now = ktime_get_real();
+ t = ktime_sub(call->keepalive_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_keepalive);
+ call->keepalive_at = KTIME_MAX;
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
+ }
+
if (skb) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -472,24 +420,13 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_initial_ping(call);
/* Process events */
- if (expired) {
- if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
- (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
- trace_rxrpc_call_reset(call);
- rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
- rxrpc_abort_call_reset);
- } else {
- rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
- rxrpc_abort_call_timeout);
- }
- goto out;
- }
-
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend &&
+ __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
+ !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
@@ -511,23 +448,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
- next = call->expect_rx_by;
+ ktime_t next = READ_ONCE(call->expect_term_by), delay;
-#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+#define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; }
set(call->expect_req_by);
- set(call->expect_term_by);
+ set(call->expect_rx_by);
set(call->delay_ack_at);
set(call->ack_lost_at);
set(call->resend_at);
set(call->keepalive_at);
set(call->ping_at);
- now = jiffies;
- if (time_after_eq(now, next))
+ now = ktime_get_real();
+ delay = ktime_sub(next, now);
+ if (delay <= 0) {
rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
-
- rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ } else {
+ unsigned long nowj = jiffies, delayj, nextj;
+
+ delayj = max(nsecs_to_jiffies(delay), 1);
+ nextj = nowj + delayj;
+ if (time_before(nextj, call->timer.expires) ||
+ !timer_pending(&call->timer)) {
+ trace_rxrpc_timer_restart(call, delay, delayj);
+ timer_reduce(&call->timer, nextj);
+ }
+ }
}
out:
@@ -542,4 +489,16 @@ out:
rxrpc_shrink_call_tx_buffer(call);
_leave("");
return true;
+
+expired:
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
+ } else {
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
+ }
+ goto out;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 9fc9a6c3f6..f9e983a12c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -70,20 +70,11 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
if (!__rxrpc_call_is_complete(call)) {
- trace_rxrpc_timer_expired(call, jiffies);
+ trace_rxrpc_timer_expired(call);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
}
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
-
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
static void rxrpc_destroy_call(struct work_struct *);
@@ -163,24 +154,27 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
refcount_set(&call->ref, 1);
- call->debug_id = debug_id;
- call->tx_total_len = -1;
- call->next_rx_timo = 20 * HZ;
- call->next_req_timo = 1 * HZ;
- call->ackr_window = 1;
- call->ackr_wtop = 1;
+ call->debug_id = debug_id;
+ call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
+ call->delay_ack_at = KTIME_MAX;
+ call->ack_lost_at = KTIME_MAX;
+ call->resend_at = KTIME_MAX;
+ call->ping_at = KTIME_MAX;
+ call->keepalive_at = KTIME_MAX;
+ call->expect_rx_by = KTIME_MAX;
+ call->expect_req_by = KTIME_MAX;
+ call->expect_term_by = KTIME_MAX;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
- if (RXRPC_TX_SMSS > 2190)
- call->cong_cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- call->cong_cwnd = 3;
- else
- call->cong_cwnd = 4;
+ call->cong_cwnd = RXRPC_MIN_CWND;
call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
call->rxnet = rxnet;
@@ -226,11 +220,11 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
__set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
if (p->timeouts.normal)
- call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL);
+ call->next_rx_timo = min(p->timeouts.normal, 1);
if (p->timeouts.idle)
- call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL);
+ call->next_req_timo = min(p->timeouts.idle, 1);
if (p->timeouts.hard)
- call->hard_timo = p->timeouts.hard * HZ;
+ call->hard_timo = p->timeouts.hard;
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
@@ -253,18 +247,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
*/
void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
- unsigned long j = now + MAX_JIFFY_OFFSET;
-
- call->delay_ack_at = j;
- call->ack_lost_at = j;
- call->resend_at = j;
- call->ping_at = j;
- call->keepalive_at = j;
- call->expect_rx_by = j;
- call->expect_req_by = j;
- call->expect_term_by = j + call->hard_timo;
- call->timer.expires = now;
+ if (call->hard_timo) {
+ ktime_t delay = ms_to_ktime(call->hard_timo * 1000);
+
+ call->expect_term_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ }
+ call->timer.expires = jiffies;
}
/*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3b9b267a44..d25bf1cf36 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -636,7 +636,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
unsigned long final_ack_at = jiffies + 2;
- WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ chan->final_ack_at = final_ack_at;
smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
rxrpc_reduce_conn_timer(conn, final_ack_at);
@@ -770,7 +770,7 @@ next:
conn_expires_at = conn->idle_timestamp + expiry;
- now = READ_ONCE(jiffies);
+ now = jiffies;
if (time_after(conn_expires_at, now))
goto not_yet_expired;
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 1f251d758c..598b4ee389 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -88,7 +88,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
- struct rxrpc_ackinfo ack_info;
+ struct rxrpc_acktrailer trailer;
size_t len;
int ret, ioc;
u32 serial, mtu, call_id, padding;
@@ -122,8 +122,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len = sizeof(pkt.whdr);
iov[1].iov_base = &padding;
iov[1].iov_len = 3;
- iov[2].iov_base = &ack_info;
- iov[2].iov_len = sizeof(ack_info);
+ iov[2].iov_base = &trailer;
+ iov[2].iov_len = sizeof(trailer);
serial = rxrpc_get_next_serial(conn);
@@ -158,14 +158,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- ack_info.rxMTU = htonl(rxrpc_rx_mtu);
- ack_info.maxMTU = htonl(mtu);
- ack_info.rwind = htonl(rxrpc_rx_window_size);
- ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ trailer.maxMTU = htonl(rxrpc_rx_mtu);
+ trailer.ifMTU = htonl(mtu);
+ trailer.rwind = htonl(rxrpc_rx_window_size);
+ trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
- len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ len += sizeof(pkt.ack) + 3 + sizeof(trailer);
ioc = 3;
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index df8a271948..1539d315af 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -68,6 +68,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
mutex_init(&conn->security_lock);
+ mutex_init(&conn->tx_data_alloc_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -118,18 +119,13 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo
switch (srx->transport.family) {
case AF_INET:
if (peer->srx.transport.sin.sin_port !=
- srx->transport.sin.sin_port ||
- peer->srx.transport.sin.sin_addr.s_addr !=
- srx->transport.sin.sin_addr.s_addr)
+ srx->transport.sin.sin_port)
goto not_found;
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
if (peer->srx.transport.sin6.sin6_port !=
- srx->transport.sin6.sin6_port ||
- memcmp(&peer->srx.transport.sin6.sin6_addr,
- &srx->transport.sin6.sin6_addr,
- sizeof(struct in6_addr)) != 0)
+ srx->transport.sin6.sin6_port)
goto not_found;
break;
#endif
@@ -341,6 +337,9 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
+ if (conn->tx_data_alloc.va)
+ __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
+ conn->tx_data_alloc.pagecnt_bias);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9691de00ad..16d49a861d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,6 +9,17 @@
#include "ar-internal.h"
+/* Override priority when generating ACKs for received DATA */
+static const u8 rxrpc_ack_priority[RXRPC_ACK__INVALID] = {
+ [RXRPC_ACK_IDLE] = 1,
+ [RXRPC_ACK_DELAY] = 2,
+ [RXRPC_ACK_REQUESTED] = 3,
+ [RXRPC_ACK_DUPLICATE] = 4,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 5,
+ [RXRPC_ACK_NOSPACE] = 6,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 7,
+};
+
static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
enum rxrpc_abort_reason why)
{
@@ -212,7 +223,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
+ if (txb->flags & RXRPC_LAST_PACKET) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
@@ -252,6 +263,9 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ call->resend_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
+
if (unlikely(call->cong_last_nack)) {
rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
call->cong_last_nack = NULL;
@@ -288,15 +302,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
struct rxrpc_ack_summary summary = { 0 };
- unsigned long now, timo;
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
- now = jiffies;
- timo = now + MAX_JIFFY_OFFSET;
-
- WRITE_ONCE(call->delay_ack_at, timo);
- trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
@@ -329,7 +339,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
case RXRPC_CALL_SERVER_RECV_REQUEST:
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ call->expect_req_by = KTIME_MAX;
rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
break;
@@ -366,7 +376,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
* Process a DATA packet.
*/
static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
- bool *_notify)
+ bool *_notify, rxrpc_serial_t *_ack_serial, int *_ack_reason)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
@@ -419,8 +429,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
- else
- call->ackr_nr_unacked++;
window++;
if (after(window, wtop)) {
@@ -498,12 +506,16 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
}
send_ack:
- if (ack_reason >= 0)
- rxrpc_send_ACK(call, ack_reason, serial,
- rxrpc_propose_ack_input_data);
- else
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_input_data);
+ if (ack_reason >= 0) {
+ if (rxrpc_ack_priority[ack_reason] > rxrpc_ack_priority[*_ack_reason]) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ } else if (rxrpc_ack_priority[ack_reason] == rxrpc_ack_priority[*_ack_reason] &&
+ ack_reason == RXRPC_ACK_REQUESTED) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ }
+ }
}
/*
@@ -514,9 +526,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
struct rxrpc_jumbo_header jhdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp;
struct sk_buff *jskb;
+ rxrpc_serial_t ack_serial = 0;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
bool notify = false;
+ int ack_reason = 0;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -536,7 +550,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
jsp = rxrpc_skb(jskb);
jsp->offset = offset;
jsp->len = RXRPC_JUMBO_DATALEN;
- rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_input_data_one(call, jskb, &notify, &ack_serial, &ack_reason);
rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
sp->hdr.flags = jhdr.flags;
@@ -549,7 +563,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->offset = offset;
sp->len = len;
- rxrpc_input_data_one(call, skb, &notify);
+ rxrpc_input_data_one(call, skb, &notify, &ack_serial, &ack_reason);
+
+ if (ack_reason > 0) {
+ rxrpc_send_ACK(call, ack_reason, ack_serial,
+ rxrpc_propose_ack_input_data);
+ } else {
+ call->ackr_nr_unacked++;
+ rxrpc_propose_delay_ACK(call, sp->hdr.serial,
+ rxrpc_propose_ack_input_data);
+ }
if (notify) {
trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
rxrpc_notify_socket(call);
@@ -589,14 +612,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
- unsigned long now, expect_req_by;
if (timo) {
- now = jiffies;
- expect_req_by = now + timo;
- WRITE_ONCE(call->expect_req_by, expect_req_by);
- rxrpc_reduce_call_timer(call, expect_req_by, now,
- rxrpc_timer_set_for_idle);
+ ktime_t delay = ms_to_ktime(timo);
+
+ call->expect_req_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_idle);
}
break;
}
@@ -670,14 +691,14 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
/*
* Process the extra information that may be appended to an ACK packet
*/
-static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
- struct rxrpc_ackinfo *ackinfo)
+static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb,
+ struct rxrpc_acktrailer *trailer)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
bool wake = false;
- u32 rwind = ntohl(ackinfo->rwind);
+ u32 rwind = ntohl(trailer->rwind);
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
@@ -688,10 +709,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
call->tx_winsize = rwind;
}
- if (call->cong_ssthresh > rwind)
- call->cong_ssthresh = rwind;
-
- mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+ mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
peer = call->peer;
if (mtu < peer->maxdata) {
@@ -713,20 +731,19 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
rxrpc_seq_t seq)
{
struct sk_buff *skb = call->cong_last_nack;
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int i, new_acks = 0, retained_nacks = 0;
- rxrpc_seq_t old_seq = sp->first_ack;
- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+ rxrpc_seq_t old_seq = sp->ack.first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- if (after_eq(seq, old_seq + sp->nr_acks)) {
- summary->nr_new_acks += sp->nr_nacks;
- summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ if (after_eq(seq, old_seq + sp->ack.nr_acks)) {
+ summary->nr_new_acks += sp->ack.nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks);
summary->nr_retained_nacks = 0;
} else if (seq == old_seq) {
- summary->nr_retained_nacks = sp->nr_nacks;
+ summary->nr_retained_nacks = sp->ack.nr_nacks;
} else {
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_NACK) {
if (before(old_seq + i, seq))
new_acks++;
@@ -739,7 +756,7 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
summary->nr_retained_nacks = retained_nacks;
}
- return old_seq + sp->nr_acks;
+ return old_seq + sp->ack.nr_acks;
}
/*
@@ -759,10 +776,10 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int i, old_nacks = 0;
- rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
if (after_eq(seq, since))
@@ -774,7 +791,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
old_nacks++;
} else {
summary->nr_new_nacks++;
- sp->nr_nacks++;
+ sp->ack.nr_nacks++;
}
if (before(seq, lowest_nak))
@@ -835,38 +852,32 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_ackinfo info;
+ struct rxrpc_acktrailer trailer;
rxrpc_serial_t ack_serial, acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
- offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
- offset += sizeof(ack);
-
- ack_serial = sp->hdr.serial;
- acked_serial = ntohl(ack.serial);
- first_soft_ack = ntohl(ack.firstPacket);
- prev_pkt = ntohl(ack.previousPacket);
- hard_ack = first_soft_ack - 1;
- nr_acks = ack.nAcks;
- sp->first_ack = first_soft_ack;
- sp->nr_acks = nr_acks;
- summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
- ack.reason : RXRPC_ACK__INVALID);
+ offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
+
+ ack_serial = sp->hdr.serial;
+ acked_serial = sp->ack.acked_serial;
+ first_soft_ack = sp->ack.first_ack;
+ prev_pkt = sp->ack.prev_ack;
+ nr_acks = sp->ack.nr_acks;
+ hard_ack = first_soft_ack - 1;
+ summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
+ sp->ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
- rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
if (acked_serial != 0) {
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING_RESPONSE:
rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
rxrpc_rtt_rx_ping_response);
@@ -886,7 +897,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicates that the client address changed due to NAT. The server
* lost the call because it switched to a different peer.
*/
- if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
rxrpc_is_client_call(call)) {
@@ -899,7 +910,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicate a change of address. However, we can retransmit the call
* if we still have it buffered to the beginning.
*/
- if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
call->acks_hard_ack == 0 &&
@@ -917,11 +928,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto send_response;
}
- info.rxMTU = 0;
+ trailer.maxMTU = 0;
ioffset = offset + nr_acks + 3;
- if (skb->len >= ioffset + sizeof(info) &&
- skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
+ if (skb->len >= ioffset + sizeof(trailer) &&
+ skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer);
if (nr_acks > 0)
skb_condense(skb);
@@ -940,7 +951,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING:
break;
default:
@@ -950,8 +961,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
/* Parse rwind and mtu sizes if provided. */
- if (info.rxMTU)
- rxrpc_input_ackinfo(call, skb, &info);
+ if (trailer.maxMTU)
+ rxrpc_input_ack_trailer(call, skb, &trailer);
if (first_soft_ack == 0)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
@@ -997,7 +1008,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_congestion_management(call, skb, &summary, acked_serial);
send_response:
- if (ack.reason == RXRPC_ACK_PING)
+ if (summary.ack_reason == RXRPC_ACK_PING)
rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
rxrpc_propose_ack_respond_to_ping);
else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
@@ -1048,12 +1059,10 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
timo = READ_ONCE(call->next_rx_timo);
if (timo) {
- unsigned long now = jiffies, expect_rx_by;
+ ktime_t delay = ms_to_ktime(timo);
- expect_rx_by = now + timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, now,
- rxrpc_timer_set_for_normal);
+ call->expect_rx_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
}
switch (sp->hdr.type) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 34353b6e58..6716c021a5 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -15,14 +15,11 @@ static int none_init_connection_security(struct rxrpc_connection *conn,
}
/*
- * Work out how much data we can put in an unsecured packet.
+ * Allocate an appropriately sized buffer for the amount of data remaining.
*/
-static int none_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- *_offset = 0;
- return 0;
+ return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp);
}
static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
@@ -79,7 +76,7 @@ const struct rxrpc_security rxrpc_no_security = {
.exit = none_exit,
.init_connection_security = none_init_connection_security,
.free_call_crypto = none_free_call_crypto,
- .how_much_data = none_how_much_data,
+ .alloc_txbuf = none_alloc_txbuf,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
.respond_to_challenge = none_respond_to_challenge,
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 4a3a08a0e2..0300baa9af 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -124,6 +124,7 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
+ struct rxrpc_ackpacket ack;
/* dig out the RxRPC connection details */
if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
@@ -141,6 +142,16 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(whdr), &ack, sizeof(ack)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_ack);
+ sp->ack.first_ack = ntohl(ack.firstPacket);
+ sp->ack.prev_ack = ntohl(ack.previousPacket);
+ sp->ack.acked_serial = ntohl(ack.serial);
+ sp->ack.reason = ack.reason;
+ sp->ack.nr_acks = ack.nAcks;
+ }
return true;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 34d3073681..504453c688 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,6 +452,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
+ if (local->tx_alloc.va)
+ __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
+ local->tx_alloc.pagecnt_bias);
}
/*
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 825b811830..657cf35089 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -17,22 +17,22 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned long rxrpc_soft_ack_delay = HZ;
+unsigned long rxrpc_soft_ack_delay = 1000;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned long rxrpc_idle_ack_delay = HZ / 2;
+unsigned long rxrpc_idle_ack_delay = 500;
/*
* Receive window size in packets. This indicates the maximum number of
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 4a292f860a..5ea9601efd 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -48,12 +48,10 @@ static const char rxrpc_keepalive_string[] = "";
static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
{
if (ret < 0) {
- u16 tx_backoff = READ_ONCE(call->tx_backoff);
-
- if (tx_backoff < HZ)
- WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ if (call->tx_backoff < 1000)
+ call->tx_backoff += 100;
} else {
- WRITE_ONCE(call->tx_backoff, 0);
+ call->tx_backoff = 0;
}
}
@@ -65,84 +63,92 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
* Receiving a response to the ping will prevent the ->expect_rx_by timer from
* expiring.
*/
-static void rxrpc_set_keepalive(struct rxrpc_call *call)
+static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
{
- unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
- keepalive_at += now;
- WRITE_ONCE(call->keepalive_at, keepalive_at);
- rxrpc_reduce_call_timer(call, keepalive_at, now,
- rxrpc_timer_set_for_keepalive);
+ call->keepalive_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
}
/*
* Fill out an ACK packet.
*/
-static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- struct rxrpc_txbuf *txb,
- u16 *_rwind)
+static void rxrpc_fill_out_ack(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ u8 ack_reason,
+ rxrpc_serial_t serial)
{
- struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
unsigned int qsize, sack, wrap, to;
rxrpc_seq_t window, wtop;
int rsize;
u32 mtu, jmax;
- u8 *ackp = txb->acks;
+ u8 *filler = txb->kvec[2].iov_base;
+ u8 *sackp = txb->kvec[1].iov_base;
- call->ackr_nr_unacked = 0;
- atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
- clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
window = call->ackr_window;
wtop = call->ackr_wtop;
sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
- txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = wtop - window;
+
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ txb->flags |= RXRPC_SLOW_START_OK;
+ ack->bufferSpace = 0;
+ ack->maxSkew = 0;
+ ack->firstPacket = htonl(window);
+ ack->previousPacket = htonl(call->rx_highest_seq);
+ ack->serial = htonl(serial);
+ ack->reason = ack_reason;
+ ack->nAcks = wtop - window;
+ filler[0] = 0;
+ filler[1] = 0;
+ filler[2] = 0;
+
+ if (ack_reason == RXRPC_ACK_PING)
+ txb->flags |= RXRPC_REQUEST_ACK;
if (after(wtop, window)) {
+ txb->len += ack->nAcks;
+ txb->kvec[1].iov_base = sackp;
+ txb->kvec[1].iov_len = ack->nAcks;
+
wrap = RXRPC_SACK_SIZE - sack;
- to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
+ to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE);
- if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
+ if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
} else {
- memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
- memcpy(txb->acks + wrap, call->ackr_sack_table,
- to - wrap);
+ memcpy(sackp, call->ackr_sack_table + sack, wrap);
+ memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
}
-
- ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
- } else if (txb->ack.reason == RXRPC_ACK_DELAY) {
- txb->ack.reason = RXRPC_ACK_IDLE;
+ } else if (ack->reason == RXRPC_ACK_DELAY) {
+ ack->reason = RXRPC_ACK_IDLE;
}
- mtu = conn->peer->if_mtu;
- mtu -= conn->peer->hdrsize;
+ mtu = call->peer->if_mtu;
+ mtu -= call->peer->hdrsize;
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
- *_rwind = rsize;
- ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
- ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(rsize);
- ackinfo.jumbo_max = htonl(jmax);
-
- *ackp++ = 0;
- *ackp++ = 0;
- *ackp++ = 0;
- memcpy(ackp, &ackinfo, sizeof(ackinfo));
- return txb->ack.nAcks + 3 + sizeof(ackinfo);
+ txb->ack_rwind = rsize;
+ trailer->maxMTU = htonl(rxrpc_rx_mtu);
+ trailer->ifMTU = htonl(mtu);
+ trailer->rwind = htonl(rsize);
+ trailer->jumbo_max = htonl(jmax);
}
/*
* Record the beginning of an RTT probe.
*/
-static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
- enum rxrpc_rtt_tx_trace why)
+static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
+ ktime_t now, enum rxrpc_rtt_tx_trace why)
{
unsigned long avail = call->rtt_avail;
int rtt_slot = 9;
@@ -155,47 +161,31 @@ static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
goto no_slot;
call->rtt_serial[rtt_slot] = serial;
- call->rtt_sent_at[rtt_slot] = ktime_get_real();
+ call->rtt_sent_at[rtt_slot] = now;
smp_wmb(); /* Write data before avail bit */
set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
- return rtt_slot;
+ return;
no_slot:
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
- return -1;
-}
-
-/*
- * Cancel an RTT probe.
- */
-static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
- rxrpc_serial_t serial, int rtt_slot)
-{
- if (rtt_slot != -1) {
- clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
- smp_wmb(); /* Clear pending bit before setting slot */
- set_bit(rtt_slot, &call->rtt_avail);
- trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial);
- }
}
/*
* Transmit an ACK packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
struct rxrpc_connection *conn;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len, n;
- int ret, rtt_slot = -1;
- u16 rwind;
+ ktime_t now;
+ int ret;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return -ECONNRESET;
+ return;
conn = call->conn;
@@ -203,55 +193,68 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- if (txb->ack.reason == RXRPC_ACK_PING)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
-
- n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
- if (n == 0)
- return 0;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
- len = iov[0].iov_len;
+ msg.msg_flags = MSG_SPLICE_PAGES;
- serial = rxrpc_get_next_serial(conn);
- txb->wire.serial = htonl(serial);
- trace_rxrpc_tx_ack(call->debug_id, serial,
- ntohl(txb->ack.firstPacket),
- ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
- rwind);
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
- if (txb->ack.reason == RXRPC_ACK_PING)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
+ txb->serial = rxrpc_get_next_serial(conn);
+ whdr->serial = htonl(txb->serial);
+ trace_rxrpc_tx_ack(call->debug_id, txb->serial,
+ ntohl(ack->firstPacket),
+ ntohl(ack->serial), ack->reason, ack->nAcks,
+ txb->ack_rwind);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
- /* Grab the highest received seq as late as possible */
- txb->ack.previousPacket = htonl(call->rx_highest_seq);
-
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len);
+ rxrpc_local_dont_fragment(conn->local, false);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0) {
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret,
rxrpc_tx_point_call_ack);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ trace_rxrpc_tx_packet(call->debug_id, whdr,
rxrpc_tx_point_call_ack);
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- call->peer->rtt_last_req = ktime_get_real();
+ now = ktime_get_real();
+ if (ack->reason == RXRPC_ACK_PING)
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping);
+ if (txb->flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = now;
+ rxrpc_set_keepalive(call, now);
}
rxrpc_tx_backoff(call, ret);
+}
- if (!__rxrpc_call_is_complete(call)) {
- if (ret < 0)
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- rxrpc_set_keepalive(call);
+/*
+ * Queue an ACK for immediate transmission.
+ */
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return;
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
+
+ txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window);
+ if (!txb) {
+ kleave(" = -ENOMEM");
+ return;
}
- return ret;
+ txb->ack_why = why;
+
+ rxrpc_fill_out_ack(call, txb, ack_reason, serial);
+ call->ackr_nr_unacked = 0;
+ atomic_set(&call->ackr_nr_consumed, 0);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
+
+ trace_rxrpc_send_ack(call, why, ack_reason, serial);
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
/*
@@ -319,38 +322,22 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
}
/*
- * send a packet through the transport endpoint
+ * Prepare a (sub)packet for transmission.
*/
-int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb,
+ rxrpc_serial_t serial)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
enum rxrpc_req_ack_trace why;
struct rxrpc_connection *conn = call->conn;
- struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len;
- int ret, rtt_slot = -1;
_enter("%x,{%d}", txb->seq, txb->len);
- /* Each transmission of a Tx packet needs a new serial number */
- serial = rxrpc_get_next_serial(conn);
- txb->wire.serial = htonl(serial);
+ txb->serial = serial;
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
txb->seq == 1)
- txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + txb->len;
- len = iov[0].iov_len;
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
-
- msg.msg_name = &call->peer->srx.transport;
- msg.msg_namelen = call->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
@@ -359,13 +346,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
* service call, lest OpenAFS incorrectly send us an ACK with some
* soft-ACKs in it and then never follow up with a proper hard ACK.
*/
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ if (txb->flags & RXRPC_REQUEST_ACK)
why = rxrpc_reqack_already_on;
- else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb))
+ else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb))
why = rxrpc_reqack_no_srv_last;
else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
why = rxrpc_reqack_ack_lost;
- else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags))
+ else if (txb->flags & RXRPC_TXBUF_RESENT)
why = rxrpc_reqack_retrans;
else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
why = rxrpc_reqack_slow_start;
@@ -381,42 +368,116 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
if (why != rxrpc_reqack_no_srv_last)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
+ txb->flags |= RXRPC_REQUEST_ACK;
dont_set_request_ack:
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
+ whdr->serial = htonl(txb->serial);
+ whdr->cksum = txb->cksum;
+
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false);
+}
+
+/*
+ * Prepare a packet for transmission.
+ */
+static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ rxrpc_serial_t serial;
+
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = rxrpc_get_next_serial(call->conn);
+
+ rxrpc_prepare_data_subpacket(call, txb, serial);
+
+ return txb->len;
+}
+
+/*
+ * Set timeouts after transmitting a packet.
+ */
+static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ ktime_t now = ktime_get_real();
+ bool ack_requested = txb->flags & RXRPC_REQUEST_ACK;
+
+ call->tx_last_sent = now;
+ txb->last_sent = now;
+
+ if (ack_requested) {
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data);
+
+ call->peer->rtt_last_req = now;
+ if (call->peer->rtt_count > 1) {
+ ktime_t delay = rxrpc_get_rto_backoff(call->peer, false);
+
+ call->ack_lost_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
+ }
+ }
+
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
+
+ call->expect_rx_by = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
+ }
+
+ rxrpc_set_keepalive(call, now);
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_connection *conn = call->conn;
+ enum rxrpc_tx_point frag;
+ struct msghdr msg;
+ size_t len;
+ int ret;
+
+ _enter("%x,{%d}", txb->seq, txb->len);
+
+ len = rxrpc_prepare_data_packet(call, txb);
+
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- trace_rxrpc_tx_data(call, txb->seq, serial,
- txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags),
- true);
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial,
+ txb->flags, true);
goto done;
}
}
- trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len);
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_SPLICE_PAGES;
/* Track what we've attempted to transmit at least once so that the
* retransmission algorithm doesn't try to resend what we haven't sent
- * yet. However, this can race as we can receive an ACK before we get
- * to this point. But, OTOH, if we won't get an ACK mentioning this
- * packet unless the far side received it (though it could have
- * discarded it anyway and NAK'd it).
+ * yet.
*/
- cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
+ if (txb->seq == call->tx_transmitted + 1)
+ call->tx_transmitted = txb->seq;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (txb->len >= call->peer->maxdata)
- goto send_fragmentable;
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
+ if (txb->len >= call->peer->maxdata) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ } else {
+ rxrpc_local_dont_fragment(conn->local, true);
+ frag = rxrpc_tx_point_call_data_nofrag;
+ }
+retry:
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
@@ -429,46 +490,21 @@ dont_set_request_ack:
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
}
rxrpc_tx_backoff(call, ret);
- if (ret == -EMSGSIZE)
- goto send_fragmentable;
+ if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ goto retry;
+ }
done:
if (ret >= 0) {
- call->tx_last_sent = txb->last_sent;
- if (txb->wire.flags & RXRPC_REQUEST_ACK) {
- call->peer->rtt_last_req = txb->last_sent;
- if (call->peer->rtt_count > 1) {
- unsigned long nowj = jiffies, ack_lost_at;
-
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
- ack_lost_at += nowj;
- WRITE_ONCE(call->ack_lost_at, ack_lost_at);
- rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
- rxrpc_timer_set_for_lost_ack);
- }
- }
-
- if (txb->seq == 1 &&
- !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
- &call->flags)) {
- unsigned long nowj = jiffies, expect_rx_by;
-
- expect_rx_by = nowj + call->next_rx_timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
- rxrpc_timer_set_for_normal);
- }
-
- rxrpc_set_keepalive(call);
+ rxrpc_tstamp_data_packets(call, txb);
} else {
/* Cancel the call if the initial transmission fails,
* particularly if that's due to network routing issues that
@@ -482,41 +518,6 @@ done:
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
-
-send_fragmentable:
- /* attempt to send this message with fragmentation enabled */
- _debug("send fragment");
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
-
- switch (conn->local->srx.transport.family) {
- case AF_INET6:
- case AF_INET:
- rxrpc_local_dont_fragment(conn->local, false);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
- conn->peer->last_tx_at = ktime_get_seconds();
-
- rxrpc_local_dont_fragment(conn->local, true);
- break;
-
- default:
- BUG();
- }
-
- if (ret < 0) {
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_frag);
- } else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_frag);
- }
- rxrpc_tx_backoff(call, ret);
- goto done;
}
/*
@@ -723,11 +724,9 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_instant_resend(call, txb);
}
} else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
+ ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
+ call->resend_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 26dc2f26d9..263a2251e3 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -52,9 +52,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
enum rxrpc_call_state state;
- unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
+ long timeout = 0;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -76,10 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
state = rxrpc_call_state(call);
- if (state != RXRPC_CALL_SERVER_PREALLOC) {
- timeout = READ_ONCE(call->expect_rx_by);
- timeout -= jiffies;
- }
+ if (state != RXRPC_CALL_SERVER_PREALLOC)
+ timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real());
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
seq_printf(seq,
@@ -309,7 +307,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
peer->mtu,
now - peer->last_tx_at,
peer->srtt_us >> 3,
- jiffies_to_usecs(peer->rto_j));
+ peer->rto_us);
return 0;
}
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index e8ee4af43c..4fe6b4d20a 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -135,9 +135,9 @@ struct rxrpc_ackpacket {
/*
* ACK packets can have a further piece of information tagged on the end
*/
-struct rxrpc_ackinfo {
- __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
- __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
+struct rxrpc_acktrailer {
+ __be32 maxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
+ __be32 ifMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
__be32 rwind; /* Rx window size (packets) [AFS 3.4] */
__be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
};
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
index be61d6f5be..cdab7b7d08 100644
--- a/net/rxrpc/rtt.c
+++ b/net/rxrpc/rtt.c
@@ -11,8 +11,8 @@
#include <linux/net.h>
#include "ar-internal.h"
-#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
-#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define RXRPC_RTO_MAX (120 * USEC_PER_SEC)
+#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
@@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
{
- return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+ return (peer->srtt_us >> 3) + peer->rttvar_us;
}
static u32 rxrpc_bound_rto(u32 rto)
@@ -124,7 +124,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer)
/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- peer->rto_j = rxrpc_bound_rto(rto);
+ peer->rto_us = rxrpc_bound_rto(rto);
}
static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
@@ -163,33 +163,33 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
spin_unlock(&peer->rtt_input_lock);
trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
- peer->srtt_us >> 3, peer->rto_j);
+ peer->srtt_us >> 3, peer->rto_us);
}
/*
- * Get the retransmission timeout to set in jiffies, backing it off each time
- * we retransmit.
+ * Get the retransmission timeout to set in nanoseconds, backing it off each
+ * time we retransmit.
*/
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
{
- u64 timo_j;
- u8 backoff = READ_ONCE(peer->backoff);
+ u64 timo_us;
+ u32 backoff = READ_ONCE(peer->backoff);
- timo_j = peer->rto_j;
- timo_j <<= backoff;
- if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ timo_us = peer->rto_us;
+ timo_us <<= backoff;
+ if (retrans && timo_us * 2 <= RXRPC_RTO_MAX)
WRITE_ONCE(peer->backoff, backoff + 1);
- if (timo_j < 1)
- timo_j = 1;
+ if (timo_us < 1)
+ timo_us = 1;
- return timo_j;
+ return ns_to_ktime(timo_us * NSEC_PER_USEC);
}
void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
{
- peer->rto_j = RXRPC_TIMEOUT_INIT;
- peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->rto_us = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = RXRPC_TIMEOUT_INIT;
peer->backoff = 0;
//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 6b32d61d4c..48a1475e6b 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -145,16 +145,17 @@ error:
/*
* Work out how much data we can put in a packet.
*/
-static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- size_t shdr, buf_size, chunk;
+ struct rxrpc_txbuf *txb;
+ size_t shdr, space;
+
+ remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header));
switch (call->conn->security_level) {
default:
- buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- shdr = 0;
- goto out;
+ space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
+ return rxrpc_alloc_data_txbuf(call, space, 1, gfp);
case RXRPC_SECURITY_AUTH:
shdr = sizeof(struct rxkad_level1_hdr);
break;
@@ -163,17 +164,16 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
break;
}
- buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN);
+ space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr);
+ space = round_up(space, RXKAD_ALIGN);
- chunk = buf_size - shdr;
- if (remain < chunk)
- buf_size = round_up(shdr + remain, RXKAD_ALIGN);
+ txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp);
+ if (!txb)
+ return NULL;
-out:
- *_buf_size = buf_size;
- *_data_size = chunk;
- *_offset = shdr;
- return 0;
+ txb->offset += shdr;
+ txb->space -= shdr;
+ return txb;
}
/*
@@ -251,7 +251,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
struct skcipher_request *req)
{
- struct rxkad_level1_hdr *hdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level1_hdr *hdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -259,7 +260,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
hdr->data_size = htonl((u32)check << 16 | txb->len);
txb->len += sizeof(struct rxkad_level1_hdr);
@@ -267,14 +268,14 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg, txb->data, 8);
+ sg_init_one(&sg, hdr, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
@@ -293,7 +294,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr *rxkhdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -302,7 +304,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
rxkhdr->checksum = 0;
@@ -312,7 +314,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
@@ -320,7 +322,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- sg_init_one(&sg, txb->data, txb->len);
+ sg_init_one(&sg, rxkhdr, txb->len);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
@@ -362,9 +364,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= txb->seq & 0x3fffffff;
- crypto.buf[0] = txb->wire.callNumber;
+ crypto.buf[0] = htonl(call->call_id);
crypto.buf[1] = htonl(x);
sg_init_one(&sg, crypto.buf, 8);
@@ -378,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
- txb->wire.cksum = htons(y);
+ txb->cksum = htons(y);
switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
@@ -726,7 +728,6 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
rxrpc_local_dont_fragment(conn->local, false);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
- rxrpc_local_dont_fragment(conn->local, true);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
@@ -1256,7 +1257,7 @@ const struct rxrpc_security rxkad = {
.free_preparse_server_key = rxkad_free_preparse_server_key,
.destroy_server_key = rxkad_destroy_server_key,
.init_connection_security = rxkad_init_connection_security,
- .how_much_data = rxkad_how_much_data,
+ .alloc_txbuf = rxkad_alloc_txbuf,
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
.free_call_crypto = rxkad_free_call_crypto,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5677d5690a..894b8fa68e 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -240,7 +240,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_notify_end_tx_t notify_end_tx)
{
rxrpc_seq_t seq = txb->seq;
- bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
+ bool poke, last = txb->flags & RXRPC_LAST_PACKET;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
@@ -336,7 +336,7 @@ reload:
do {
if (!txb) {
- size_t remain, bufsize, chunk, offset;
+ size_t remain;
_debug("alloc");
@@ -348,23 +348,11 @@ reload:
* region (enc blocksize), but the trailer is not.
*/
remain = more ? INT_MAX : msg_data_left(msg);
- ret = call->conn->security->how_much_data(call, remain,
- &bufsize, &chunk, &offset);
- if (ret < 0)
+ txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation);
+ if (!txb) {
+ ret = -ENOMEM;
goto maybe_error;
-
- _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset);
-
- /* create a buffer that we can retain until it's ACK'd */
- ret = -ENOMEM;
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA,
- GFP_KERNEL);
- if (!txb)
- goto maybe_error;
-
- txb->offset = offset;
- txb->space -= offset;
- txb->space = min_t(size_t, chunk, txb->space);
+ }
}
_debug("append");
@@ -374,8 +362,8 @@ reload:
size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
_debug("add %zu", copy);
- if (!copy_from_iter_full(txb->data + txb->offset, copy,
- &msg->msg_iter))
+ if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset,
+ copy, &msg->msg_iter))
goto efault;
_debug("added");
txb->space -= copy;
@@ -394,18 +382,18 @@ reload:
/* add the packet to the send queue if it's now full */
if (!txb->space ||
(msg_data_left(msg) == 0 && !more)) {
- if (msg_data_left(msg) == 0 && !more) {
- txb->wire.flags |= RXRPC_LAST_PACKET;
- __set_bit(RXRPC_TXBUF_LAST, &txb->flags);
- }
+ if (msg_data_left(msg) == 0 && !more)
+ txb->flags |= RXRPC_LAST_PACKET;
else if (call->tx_top - call->acks_hard_ack <
call->tx_winsize)
- txb->wire.flags |= RXRPC_MORE_PACKETS;
+ txb->flags |= RXRPC_MORE_PACKETS;
ret = call->security->secure_packet(call, txb);
if (ret < 0)
goto out;
+ txb->kvec[0].iov_len += txb->len;
+ txb->len = txb->kvec[0].iov_len;
rxrpc_queue_packet(rx, call, txb, notify_end_tx);
txb = NULL;
}
@@ -621,7 +609,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call;
- unsigned long now, j;
bool dropped_lock = false;
int ret;
@@ -699,25 +686,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
switch (p.call.nr_timeouts) {
case 3:
- j = msecs_to_jiffies(p.call.timeouts.normal);
- if (p.call.timeouts.normal > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_rx_timo, j);
+ WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal);
fallthrough;
case 2:
- j = msecs_to_jiffies(p.call.timeouts.idle);
- if (p.call.timeouts.idle > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_req_timo, j);
+ WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle);
fallthrough;
case 1:
if (p.call.timeouts.hard > 0) {
- j = p.call.timeouts.hard * HZ;
- now = jiffies;
- j += now;
- WRITE_ONCE(call->expect_term_by, j);
- rxrpc_reduce_call_timer(call, j, now,
- rxrpc_timer_set_for_hard);
+ ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC);
+
+ WRITE_ONCE(call->expect_term_by,
+ ktime_add(p.call.timeouts.hard,
+ ktime_get_real()));
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
+
}
break;
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index ecaeb4ecfb..9bf9a1f6e4 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -15,6 +15,8 @@ static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
+static const unsigned long one_ms = 1;
+static const unsigned long max_ms = 1000;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
@@ -28,24 +30,24 @@ static const unsigned long max_500 = 500;
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
- /* Values measured in milliseconds but used in jiffies */
+ /* Values measured in milliseconds */
{
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_ack_delay",
.data = &rxrpc_idle_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_conn_expiry",
@@ -125,7 +127,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
- { }
};
int __init rxrpc_sysctl_init(void)
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d43be85123..c3913d8a50 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -14,45 +14,146 @@ static atomic_t rxrpc_txbuf_debug_ids;
atomic_t rxrpc_nr_txbuf;
/*
- * Allocate and partially initialise an I/O request structure.
+ * Allocate and partially initialise a data transmission buffer.
*/
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp)
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp)
{
+ struct rxrpc_wire_header *whdr;
struct rxrpc_txbuf *txb;
+ size_t total, hoff;
+ void *buf;
txb = kmalloc(sizeof(*txb), gfp);
- if (txb) {
- INIT_LIST_HEAD(&txb->call_link);
- INIT_LIST_HEAD(&txb->tx_link);
- refcount_set(&txb->ref, 1);
- txb->call_debug_id = call->debug_id;
- txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
- txb->space = sizeof(txb->data);
- txb->len = 0;
- txb->offset = 0;
- txb->flags = 0;
- txb->ack_why = 0;
- txb->seq = call->tx_prepared + 1;
- txb->wire.epoch = htonl(call->conn->proto.epoch);
- txb->wire.cid = htonl(call->cid);
- txb->wire.callNumber = htonl(call->call_id);
- txb->wire.seq = htonl(txb->seq);
- txb->wire.type = packet_type;
- txb->wire.flags = call->conn->out_clientflag;
- txb->wire.userStatus = 0;
- txb->wire.securityIndex = call->security_ix;
- txb->wire._rsvd = 0;
- txb->wire.serviceId = htons(call->dest_srx.srx_service);
-
- trace_rxrpc_txbuf(txb->debug_id,
- txb->call_debug_id, txb->seq, 1,
- packet_type == RXRPC_PACKET_TYPE_DATA ?
- rxrpc_txbuf_alloc_data :
- rxrpc_txbuf_alloc_ack);
- atomic_inc(&rxrpc_nr_txbuf);
+ if (!txb)
+ return NULL;
+
+ hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr);
+ total = hoff + sizeof(*whdr) + data_size;
+
+ data_align = umax(data_align, L1_CACHE_BYTES);
+ mutex_lock(&call->conn->tx_data_alloc_lock);
+ buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp,
+ data_align);
+ mutex_unlock(&call->conn->tx_data_alloc_lock);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ whdr = buf + hoff;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->last_sent = KTIME_MIN;
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = data_size;
+ txb->len = 0;
+ txb->offset = sizeof(*whdr);
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_why = 0;
+ txb->seq = call->tx_prepared + 1;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 1;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = htonl(txb->seq);
+ whdr->type = RXRPC_PACKET_TYPE_DATA;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_data);
+
+ atomic_inc(&rxrpc_nr_txbuf);
+ return txb;
+}
+
+/*
+ * Allocate and partially initialise an ACK packet.
+ */
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size)
+{
+ struct rxrpc_wire_header *whdr;
+ struct rxrpc_acktrailer *trailer;
+ struct rxrpc_ackpacket *ack;
+ struct rxrpc_txbuf *txb;
+ gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
+ void *buf, *buf2 = NULL;
+ u8 *filler;
+
+ txb = kmalloc(sizeof(*txb), gfp);
+ if (!txb)
+ return NULL;
+
+ buf = page_frag_alloc(&call->local->tx_alloc,
+ sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ if (sack_size) {
+ buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
+ if (!buf2) {
+ page_frag_free(buf);
+ kfree(txb);
+ return NULL;
+ }
}
+ whdr = buf;
+ ack = buf + sizeof(*whdr);
+ filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
+ trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = 0;
+ txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer);
+ txb->offset = 0;
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_rwind = 0;
+ txb->seq = 0;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 3;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack);
+ txb->kvec[1].iov_base = buf2;
+ txb->kvec[1].iov_len = sack_size;
+ txb->kvec[2].iov_base = filler;
+ txb->kvec[2].iov_len = 3 + sizeof(*trailer);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ get_page(virt_to_head_page(trailer));
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_ack);
+ atomic_inc(&rxrpc_nr_txbuf);
return txb;
}
@@ -71,12 +172,15 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what);
}
-static void rxrpc_free_txbuf(struct rcu_head *rcu)
+static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb)
{
- struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu);
+ int i;
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
rxrpc_txbuf_free);
+ for (i = 0; i < txb->nr_kvec; i++)
+ if (txb->kvec[i].iov_base)
+ page_frag_free(txb->kvec[i].iov_base);
kfree(txb);
atomic_dec(&rxrpc_nr_txbuf);
}
@@ -95,7 +199,7 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
dead = __refcount_dec_and_test(&txb->ref, &r);
trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what);
if (dead)
- call_rcu(&txb->rcu, rxrpc_free_txbuf);
+ rxrpc_free_txbuf(txb);
}
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 470c70deff..8180d0c12f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -737,16 +737,6 @@ config NET_ACT_SAMPLE
To compile this code as a module, choose M here: the
module will be called act_sample.
-config NET_ACT_IPT
- tristate "IPtables targets"
- depends on NET_CLS_ACT && NETFILTER && NETFILTER_XTABLES
- help
- Say Y here to be able to invoke iptables targets after successful
- classification.
-
- To compile this code as a module, choose M here: the
- module will be called act_ipt.
-
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3e30d72604..2520708b06 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
u32 max;
if (*index) {
-again:
rcu_read_lock();
p = idr_find(&idrinfo->action_idr, *index);
@@ -839,7 +838,7 @@ again:
* index but did not assign the pointer yet.
*/
rcu_read_unlock();
- goto again;
+ return -EAGAIN;
}
if (!p) {
@@ -1363,7 +1362,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
if (rtnl_held)
rtnl_unlock();
- request_module("act_%s", act_name);
+ request_module(NET_ACT_ALIAS_PREFIX "%s", act_name);
if (rtnl_held)
rtnl_lock();
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 6cfee66581..0e3cf11ae5 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -401,6 +401,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.size = sizeof(struct tcf_bpf),
};
+MODULE_ALIAS_NET_ACT("bpf");
static __net_init int bpf_init_net(struct net *net)
{
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f876275665..0fce631e7c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -242,6 +242,7 @@ static struct tc_action_ops act_connmark_ops = {
.cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
+MODULE_ALIAS_NET_ACT("connmark");
static __net_init int connmark_init_net(struct net *net)
{
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 7f8b1f2f2e..5cc8e407e7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -709,6 +709,7 @@ static struct tc_action_ops act_csum_ops = {
.offload_act_setup = tcf_csum_offload_act_setup,
.size = sizeof(struct tcf_csum),
};
+MODULE_ALIAS_NET_ACT("csum");
static __net_init int csum_init_net(struct net *net)
{
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 6124d8b128..9d451d77d5 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -41,21 +41,28 @@ static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
static DEFINE_MUTEX(zones_mutex);
+struct zones_ht_key {
+ struct net *net;
+ u16 zone;
+ /* Note : pad[] must be the last field. */
+ u8 pad[];
+};
+
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
refcount_t ref;
- u16 zone;
+ struct zones_ht_key key;
bool dying;
};
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
- .key_offset = offsetof(struct tcf_ct_flow_table, zone),
- .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .key_offset = offsetof(struct tcf_ct_flow_table, key),
+ .key_len = offsetof(struct zones_ht_key, pad),
.automatic_shrinking = true,
};
@@ -316,11 +323,12 @@ static struct nf_flowtable_type flowtable_ct = {
static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
{
+ struct zones_ht_key key = { .net = net, .zone = params->zone };
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
mutex_lock(&zones_mutex);
- ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params);
if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
goto out_unlock;
@@ -329,7 +337,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
goto err_alloc;
refcount_set(&ct_ft->ref, 1);
- ct_ft->zone = params->zone;
+ ct_ft->key = key;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
if (err)
goto err_insert;
@@ -1071,6 +1079,14 @@ do_nat:
*/
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
goto drop;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
@@ -1600,6 +1616,7 @@ static struct tc_action_ops act_ct_ops = {
.offload_act_setup = tcf_ct_offload_act_setup,
.size = sizeof(struct tcf_ct),
};
+MODULE_ALIAS_NET_ACT("ct");
static __net_init int ct_init_net(struct net *net)
{
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index e620f9a84a..5dd41a0121 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -363,6 +363,7 @@ static struct tc_action_ops act_ctinfo_ops = {
.cleanup= tcf_ctinfo_cleanup,
.size = sizeof(struct tcf_ctinfo),
};
+MODULE_ALIAS_NET_ACT("ctinfo");
static __net_init int ctinfo_init_net(struct net *net)
{
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4af3b7ec24..e949280eb8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -296,6 +296,7 @@ static struct tc_action_ops act_gact_ops = {
.offload_act_setup = tcf_gact_offload_act_setup,
.size = sizeof(struct tcf_gact),
};
+MODULE_ALIAS_NET_ACT("gact");
static __net_init int gact_init_net(struct net *net)
{
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c681cd011a..1dd7412539 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -645,6 +645,7 @@ static struct tc_action_ops act_gate_ops = {
.offload_act_setup = tcf_gate_offload_act_setup,
.size = sizeof(struct tcf_gate),
};
+MODULE_ALIAS_NET_ACT("gate");
static __net_init int gate_init_net(struct net *net)
{
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 0e867d13be..107c6d83dc 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -889,6 +889,7 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.size = sizeof(struct tcf_ife_info),
};
+MODULE_ALIAS_NET_ACT("ife");
static __net_init int ife_init_net(struct net *net)
{
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6faa7d00da..5b38143659 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -635,6 +635,7 @@ static struct tc_action_ops act_mirred_ops = {
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
};
+MODULE_ALIAS_NET_ACT("mirred");
static __net_init int mirred_init_net(struct net *net)
{
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 34b8edb6cc..44a37a71ae 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -452,6 +452,7 @@ static struct tc_action_ops act_mpls_ops = {
.offload_act_setup = tcf_mpls_offload_act_setup,
.size = sizeof(struct tcf_mpls),
};
+MODULE_ALIAS_NET_ACT("mpls");
static __net_init int mpls_init_net(struct net *net)
{
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a180e72463..d541f55380 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -324,6 +324,7 @@ static struct tc_action_ops act_nat_ops = {
.cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
+MODULE_ALIAS_NET_ACT("nat");
static __net_init int nat_init_net(struct net *net)
{
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2ef22969f2..fc0a35a7b6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -515,11 +515,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
}
+ opt->nkeys = parms->tcfp_nkeys;
memcpy(opt->keys, parms->tcfp_keys,
flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
- opt->nkeys = parms->tcfp_nkeys;
opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
@@ -620,6 +620,7 @@ static struct tc_action_ops act_pedit_ops = {
.offload_act_setup = tcf_pedit_offload_act_setup,
.size = sizeof(struct tcf_pedit),
};
+MODULE_ALIAS_NET_ACT("pedit");
static __net_init int pedit_init_net(struct net *net)
{
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e119b4a3db..8555125ed3 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -502,6 +502,7 @@ static struct tc_action_ops act_police_ops = {
.offload_act_setup = tcf_police_offload_act_setup,
.size = sizeof(struct tcf_police),
};
+MODULE_ALIAS_NET_ACT("police");
static __net_init int police_init_net(struct net *net)
{
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index c5c61efe6d..a69b53d540 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -316,6 +316,7 @@ static struct tc_action_ops act_sample_ops = {
.offload_act_setup = tcf_sample_offload_act_setup,
.size = sizeof(struct tcf_sample),
};
+MODULE_ALIAS_NET_ACT("sample");
static __net_init int sample_init_net(struct net *net)
{
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 0a3e928882..f3abe05459 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -209,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.size = sizeof(struct tcf_defact),
};
+MODULE_ALIAS_NET_ACT("simple");
static __net_init int simp_init_net(struct net *net)
{
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 754f78b35b..1f1d9ce3e9 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -426,6 +426,7 @@ static struct tc_action_ops act_skbedit_ops = {
.offload_act_setup = tcf_skbedit_offload_act_setup,
.size = sizeof(struct tcf_skbedit),
};
+MODULE_ALIAS_NET_ACT("skbedit");
static __net_init int skbedit_init_net(struct net *net)
{
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 0015393910..cd0accaf84 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -287,6 +287,7 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.size = sizeof(struct tcf_skbmod),
};
+MODULE_ALIAS_NET_ACT("skbmod");
static __net_init int skbmod_init_net(struct net *net)
{
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 300b08aa82..af7c998459 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -230,7 +230,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
nla_for_each_attr(attr, head, len, rem) {
switch (nla_type(attr)) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT) {
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) {
NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
return -EINVAL;
}
@@ -247,7 +247,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
dst_len -= opt_len;
dst += opt_len;
}
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
if (type) {
@@ -259,7 +259,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
if (type) {
@@ -271,7 +271,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
}
}
@@ -302,7 +302,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
switch (nla_type(nla_data(nla))) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -310,7 +310,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
#endif
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -318,7 +318,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
#endif
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
#if IS_ENABLED(CONFIG_INET)
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
@@ -363,6 +363,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *metadata = NULL;
struct tcf_chain *goto_ch = NULL;
struct tc_tunnel_key *parm;
@@ -371,7 +372,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
__be16 dst_port = 0;
__be64 key_id = 0;
int opts_len = 0;
- __be16 flags = 0;
u8 tos, ttl;
int ret = 0;
u32 index;
@@ -412,16 +412,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
key_id = key32_to_tunnel_id(key32);
- flags = TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
}
- flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
- flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, flags);
if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG]))
- flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags);
if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
@@ -663,15 +663,15 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
if (!start)
return -EMSGSIZE;
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_geneve_opts_dump(skb, info);
if (err)
goto err_out;
- } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_vxlan_opts_dump(skb, info);
if (err)
goto err_out;
- } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
err = tunnel_key_erspan_opts_dump(skb, info);
if (err)
goto err_out;
@@ -741,7 +741,7 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
struct ip_tunnel_key *key = &info->key;
__be32 key_id = tunnel_id_to_key32(key->tun_id);
- if (((key->tun_flags & TUNNEL_KEY) &&
+ if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) &&
nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
tunnel_key_dump_addresses(skb,
&params->tcft_enc_metadata->u.tun_info) ||
@@ -749,8 +749,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
key->tp_dst)) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
- !(key->tun_flags & TUNNEL_CSUM)) ||
- ((key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) ||
+ (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) &&
nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) ||
tunnel_key_opts_dump(skb, info))
goto nla_put_failure;
@@ -842,6 +842,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
.offload_act_setup = tcf_tunnel_key_offload_act_setup,
.size = sizeof(struct tcf_tunnel_key),
};
+MODULE_ALIAS_NET_ACT("tunnel_key");
static __net_init int tunnel_key_init_net(struct net *net)
{
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 836183011a..22f4b1e8ad 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -427,6 +427,7 @@ static struct tc_action_ops act_vlan_ops = {
.offload_act_setup = tcf_vlan_offload_act_setup,
.size = sizeof(struct tcf_vlan),
};
+MODULE_ALIAS_NET_ACT("vlan");
static __net_init int vlan_init_net(struct net *net)
{
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff3d396a65..17d97bbe89 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -257,7 +257,7 @@ tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
#ifdef CONFIG_MODULES
if (rtnl_held)
rtnl_unlock();
- request_module("cls_%s", kind);
+ request_module(NET_CLS_ALIAS_PREFIX "%s", kind);
if (rtnl_held)
rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
@@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp)
refcount_inc(&tp->refcnt);
}
+static void tcf_maintain_bypass(struct tcf_block *block)
+{
+ int filtercnt = atomic_read(&block->filtercnt);
+ int skipswcnt = atomic_read(&block->skipswcnt);
+ bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
+
+ if (bypass_wanted != block->bypass_wanted) {
+#ifdef CONFIG_NET_CLS_ACT
+ if (bypass_wanted)
+ static_branch_inc(&tcf_bypass_check_needed_key);
+ else
+ static_branch_dec(&tcf_bypass_check_needed_key);
+#endif
+ block->bypass_wanted = bypass_wanted;
+ }
+}
+
+static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
+{
+ lockdep_assert_not_held(&block->cb_lock);
+
+ down_write(&block->cb_lock);
+ if (*counted != add) {
+ if (add) {
+ atomic_inc(&block->filtercnt);
+ *counted = true;
+ } else {
+ atomic_dec(&block->filtercnt);
+ *counted = false;
+ }
+ }
+ tcf_maintain_bypass(block);
+ up_write(&block->cb_lock);
+}
+
static void tcf_chain_put(struct tcf_chain *chain);
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
+ tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
if (sig_destroy)
tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
@@ -2367,6 +2403,7 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
+ tcf_block_filter_cnt_update(block, &tp->counted, true);
/* q pointer is NULL for shared blocks */
if (q)
q->flags &= ~TCQ_F_CAN_BYPASS;
@@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
+ if (tc_skip_sw(*flags))
+ atomic_inc(&block->skipswcnt);
atomic_inc(&block->offloadcnt);
}
@@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
+ if (tc_skip_sw(*flags))
+ atomic_dec(&block->skipswcnt);
atomic_dec(&block->offloadcnt);
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index a1f5693133..ecfaa4f9a0 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -328,6 +328,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
.bind_class = basic_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("basic");
static int __init init_basic(void)
{
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 382c7a71f8..5e83e890f6 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -693,6 +693,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
.dump = cls_bpf_dump,
.bind_class = cls_bpf_bind_class,
};
+MODULE_ALIAS_NET_CLS("bpf");
static int __init cls_bpf_init_mod(void)
{
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7ee8dbf49e..424252982d 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -209,6 +209,7 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
.dump = cls_cgroup_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("cgroup");
static int __init init_cgroup_cls(void)
{
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ab317b48d..5502998aac 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -702,6 +702,7 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = {
.walk = flow_walk,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("flow");
static int __init cls_flow_init(void)
{
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6ee7064c82..fd9a6f20b6 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -28,6 +28,7 @@
#include <net/vxlan.h>
#include <net/erspan.h>
#include <net/gtp.h>
+#include <net/pfcp.h>
#include <net/tc_wrapper.h>
#include <net/dst.h>
@@ -741,6 +742,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_PFCP] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -771,6 +773,12 @@ gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = {
};
static const struct nla_policy
+pfcp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy
mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
@@ -1419,6 +1427,44 @@ static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key,
return sizeof(*sinfo);
}
+static int fl_set_pfcp_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX + 1];
+ struct pfcp_metadata *md;
+ int err;
+
+ md = (struct pfcp_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_PFCP) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-pfcp option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_PFCP_MAX, nla,
+ pfcp_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing tunnel key pfcp option type");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE])
+ md->type = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE]);
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID])
+ md->seid = nla_get_be64(tb[TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID]);
+
+ return sizeof(*md);
+}
+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1454,12 +1500,13 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
switch (nla_type(nla_opt_key)) {
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
if (key->enc_opts.dst_opt_type &&
- key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) {
+ key->enc_opts.dst_opt_type !=
+ IP_TUNNEL_GENEVE_OPT_BIT) {
NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT;
option_len = fl_set_geneve_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1470,7 +1517,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT;
option_len = fl_set_geneve_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1489,7 +1536,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT;
option_len = fl_set_vxlan_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1500,7 +1547,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT;
option_len = fl_set_vxlan_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1519,7 +1566,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT;
option_len = fl_set_erspan_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1530,7 +1577,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT;
option_len = fl_set_erspan_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1550,7 +1597,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
option_len = 0;
- key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT;
option_len = fl_set_gtp_opt(nla_opt_key, key,
key_depth, option_len,
extack);
@@ -1561,7 +1608,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
/* At the same time we need to parse through the mask
* in order to verify exact and mask attribute lengths.
*/
- mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT;
option_len = fl_set_gtp_opt(nla_opt_msk, mask,
msk_depth, option_len,
extack);
@@ -1575,6 +1622,36 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
break;
+ case TCA_FLOWER_KEY_ENC_OPTS_PFCP:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Duplicate type for pfcp options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT;
+ option_len = fl_set_pfcp_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = IP_TUNNEL_PFCP_OPT_BIT;
+ option_len = fl_set_pfcp_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG_MOD(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+ break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
return -EINVAL;
@@ -3117,6 +3194,32 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fl_dump_key_pfcp_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct pfcp_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_PFCP);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct pfcp_metadata *)&enc_opts->data[0];
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_TYPE, md->type))
+ goto nla_put_failure;
+
+ if (nla_put_be64(skb, TCA_FLOWER_KEY_ENC_OPT_PFCP_SEID,
+ md->seid, 0))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int fl_dump_key_ct(struct sk_buff *skb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask)
@@ -3202,26 +3305,31 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
goto nla_put_failure;
switch (enc_opts->dst_opt_type) {
- case TUNNEL_GENEVE_OPT:
+ case IP_TUNNEL_GENEVE_OPT_BIT:
err = fl_dump_key_geneve_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_VXLAN_OPT:
+ case IP_TUNNEL_VXLAN_OPT_BIT:
err = fl_dump_key_vxlan_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_ERSPAN_OPT:
+ case IP_TUNNEL_ERSPAN_OPT_BIT:
err = fl_dump_key_erspan_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
- case TUNNEL_GTP_OPT:
+ case IP_TUNNEL_GTP_OPT_BIT:
err = fl_dump_key_gtp_opt(skb, enc_opts);
if (err)
goto nla_put_failure;
break;
+ case IP_TUNNEL_PFCP_OPT_BIT:
+ err = fl_dump_key_pfcp_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
default:
goto nla_put_failure;
}
@@ -3659,6 +3767,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.owner = THIS_MODULE,
.flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
+MODULE_ALIAS_NET_CLS("flower");
static int __init cls_fl_init(void)
{
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index afc534ee0a..cdddc86952 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -433,6 +433,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.bind_class = fw_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("fw");
static int __init init_fw(void)
{
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index c4ed11df62..9f1e62ca50 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -398,6 +398,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.bind_class = mall_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("matchall");
static int __init cls_mall_init(void)
{
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 12a505db41..b9c58c040c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -671,6 +671,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.bind_class = route4_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("route");
static int __init init_route4(void)
{
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 289e1755c2..9412d88a99 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1453,6 +1453,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
.bind_class = u32_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("u32");
static int __init init_u32(void)
{
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 31e38a614f..74afc21052 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -228,7 +228,7 @@ int qdisc_set_default(const char *name)
if (!ops) {
/* Not found, drop lock and try to load module */
write_unlock(&qdisc_mod_lock);
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
write_lock(&qdisc_mod_lock);
ops = qdisc_lookup_default(name);
@@ -1275,7 +1275,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* go away in the mean time.
*/
rtnl_unlock();
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
rtnl_lock();
ops = qdisc_lookup_ops(kind);
if (ops != NULL) {
@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc.
*/
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
- dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
@@ -1389,6 +1389,7 @@ err_out4:
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ lockdep_unregister_key(&sch->root_lock_key);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
@@ -2410,7 +2411,7 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
-#if IS_ENABLED(CONFIG_RETPOLINE)
+#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
#endif
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 9cff995586..9602dafe32 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1512,7 +1512,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
if (!q->overflow_timeout) {
int i;
/* Build fresh max-heap */
- for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+ for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2 - 1; i >= 0; i--)
cake_heapify(q, i);
}
q->overflow_timeout = 65535;
@@ -2572,6 +2572,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
{
struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CAKE_MAX + 1];
+ u16 rate_flags;
+ u8 flow_mode;
int err;
err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2579,10 +2581,11 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
+ flow_mode = q->flow_mode;
if (tb[TCA_CAKE_NAT]) {
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
- q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+ flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+ flow_mode |= CAKE_FLOW_NAT_FLAG *
!!nla_get_u32(tb[TCA_CAKE_NAT]);
#else
NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
@@ -2592,29 +2595,34 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_BASE_RATE64])
- q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+ WRITE_ONCE(q->rate_bps,
+ nla_get_u64(tb[TCA_CAKE_BASE_RATE64]));
if (tb[TCA_CAKE_DIFFSERV_MODE])
- q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+ WRITE_ONCE(q->tin_mode,
+ nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]));
+ rate_flags = q->rate_flags;
if (tb[TCA_CAKE_WASH]) {
if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
- q->rate_flags |= CAKE_FLAG_WASH;
+ rate_flags |= CAKE_FLAG_WASH;
else
- q->rate_flags &= ~CAKE_FLAG_WASH;
+ rate_flags &= ~CAKE_FLAG_WASH;
}
if (tb[TCA_CAKE_FLOW_MODE])
- q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+ flow_mode = ((flow_mode & CAKE_FLOW_NAT_FLAG) |
(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
CAKE_FLOW_MASK));
if (tb[TCA_CAKE_ATM])
- q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+ WRITE_ONCE(q->atm_mode,
+ nla_get_u32(tb[TCA_CAKE_ATM]));
if (tb[TCA_CAKE_OVERHEAD]) {
- q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
- q->rate_flags |= CAKE_FLAG_OVERHEAD;
+ WRITE_ONCE(q->rate_overhead,
+ nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
+ rate_flags |= CAKE_FLAG_OVERHEAD;
q->max_netlen = 0;
q->max_adjlen = 0;
@@ -2623,7 +2631,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_RAW]) {
- q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+ rate_flags &= ~CAKE_FLAG_OVERHEAD;
q->max_netlen = 0;
q->max_adjlen = 0;
@@ -2632,54 +2640,58 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_MPU])
- q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+ WRITE_ONCE(q->rate_mpu,
+ nla_get_u32(tb[TCA_CAKE_MPU]));
if (tb[TCA_CAKE_RTT]) {
- q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+ u32 interval = nla_get_u32(tb[TCA_CAKE_RTT]);
- if (!q->interval)
- q->interval = 1;
+ WRITE_ONCE(q->interval, max(interval, 1U));
}
if (tb[TCA_CAKE_TARGET]) {
- q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+ u32 target = nla_get_u32(tb[TCA_CAKE_TARGET]);
- if (!q->target)
- q->target = 1;
+ WRITE_ONCE(q->target, max(target, 1U));
}
if (tb[TCA_CAKE_AUTORATE]) {
if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
- q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+ rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
else
- q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+ rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
}
if (tb[TCA_CAKE_INGRESS]) {
if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
- q->rate_flags |= CAKE_FLAG_INGRESS;
+ rate_flags |= CAKE_FLAG_INGRESS;
else
- q->rate_flags &= ~CAKE_FLAG_INGRESS;
+ rate_flags &= ~CAKE_FLAG_INGRESS;
}
if (tb[TCA_CAKE_ACK_FILTER])
- q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+ WRITE_ONCE(q->ack_filter,
+ nla_get_u32(tb[TCA_CAKE_ACK_FILTER]));
if (tb[TCA_CAKE_MEMORY])
- q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+ WRITE_ONCE(q->buffer_config_limit,
+ nla_get_u32(tb[TCA_CAKE_MEMORY]));
if (tb[TCA_CAKE_SPLIT_GSO]) {
if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
- q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+ rate_flags |= CAKE_FLAG_SPLIT_GSO;
else
- q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+ rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
}
if (tb[TCA_CAKE_FWMARK]) {
- q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
- q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+ WRITE_ONCE(q->fwmark_mask, nla_get_u32(tb[TCA_CAKE_FWMARK]));
+ WRITE_ONCE(q->fwmark_shft,
+ q->fwmark_mask ? __ffs(q->fwmark_mask) : 0);
}
+ WRITE_ONCE(q->rate_flags, rate_flags);
+ WRITE_ONCE(q->flow_mode, flow_mode);
if (q->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2774,68 +2786,72 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *opts;
+ u16 rate_flags;
+ u8 flow_mode;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!opts)
goto nla_put_failure;
- if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
- TCA_CAKE_PAD))
+ if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
+ READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
- q->flow_mode & CAKE_FLOW_MASK))
+ flow_mode = READ_ONCE(q->flow_mode);
+ if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, flow_mode & CAKE_FLOW_MASK))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+ if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+ if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+ if (nla_put_u32(skb, TCA_CAKE_MEMORY,
+ READ_ONCE(q->buffer_config_limit)))
goto nla_put_failure;
+ rate_flags = READ_ONCE(q->rate_flags);
if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
- !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+ !!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_INGRESS,
- !!(q->rate_flags & CAKE_FLAG_INGRESS)))
+ !!(rate_flags & CAKE_FLAG_INGRESS)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+ if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_NAT,
- !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+ !!(flow_mode & CAKE_FLOW_NAT_FLAG)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+ if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_WASH,
- !!(q->rate_flags & CAKE_FLAG_WASH)))
+ !!(rate_flags & CAKE_FLAG_WASH)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+ if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, READ_ONCE(q->rate_overhead)))
goto nla_put_failure;
- if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+ if (!(rate_flags & CAKE_FLAG_OVERHEAD))
if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+ if (nla_put_u32(skb, TCA_CAKE_ATM, READ_ONCE(q->atm_mode)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+ if (nla_put_u32(skb, TCA_CAKE_MPU, READ_ONCE(q->rate_mpu)))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
- !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+ !!(rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, READ_ONCE(q->fwmark_mask)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -3103,6 +3119,7 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
.dump_stats = cake_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cake");
static int __init cake_module_init(void)
{
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index beece8e82c..939425da18 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
}
/* Everything went OK, save the parameters used. */
- q->hicredit = qopt->hicredit;
- q->locredit = qopt->locredit;
- q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
- q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
- q->offload = qopt->offload;
+ WRITE_ONCE(q->hicredit, qopt->hicredit);
+ WRITE_ONCE(q->locredit, qopt->locredit);
+ WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT);
+ WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT);
+ WRITE_ONCE(q->offload, qopt->offload);
return 0;
}
@@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!nest)
goto nla_put_failure;
- opt.hicredit = q->hicredit;
- opt.locredit = q->locredit;
- opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
- opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
- opt.offload = q->offload;
+ opt.hicredit = READ_ONCE(q->hicredit);
+ opt.locredit = READ_ONCE(q->locredit);
+ opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT);
+ opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT);
+ opt.offload = READ_ONCE(q->offload);
if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -546,6 +546,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.dump = cbs_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cbs");
static struct notifier_block cbs_device_notifier = {
.notifier_call = cbs_dev_notifier,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ae1da08e26..9107201092 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -405,8 +405,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
} else
sch_tree_lock(sch);
- q->flags = ctl->flags;
- q->limit = ctl->limit;
+ WRITE_ONCE(q->flags, ctl->flags);
+ WRITE_ONCE(q->limit, ctl->limit);
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
@@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt,
static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct choke_sched_data *q = qdisc_priv(sch);
+ u8 Wlog = READ_ONCE(q->parms.Wlog);
struct nlattr *opts = NULL;
struct tc_red_qopt opt = {
- .limit = q->limit,
- .flags = q->flags,
- .qth_min = q->parms.qth_min >> q->parms.Wlog,
- .qth_max = q->parms.qth_max >> q->parms.Wlog,
- .Wlog = q->parms.Wlog,
- .Plog = q->parms.Plog,
- .Scell_log = q->parms.Scell_log,
+ .limit = READ_ONCE(q->limit),
+ .flags = READ_ONCE(q->flags),
+ .qth_min = READ_ONCE(q->parms.qth_min) >> Wlog,
+ .qth_max = READ_ONCE(q->parms.qth_max) >> Wlog,
+ .Wlog = Wlog,
+ .Plog = READ_ONCE(q->parms.Plog),
+ .Scell_log = READ_ONCE(q->parms.Scell_log),
};
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -447,7 +448,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
- nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
+ nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -498,6 +499,7 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
.dump_stats = choke_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("choke");
static int __init choke_module_init(void)
{
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index d7a4874543..3e8d4fe4d9 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Codel - The Controlled-Delay Active Queue Management algorithm
*
@@ -7,37 +8,6 @@
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
* Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The names of the authors may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, provided that this notice is retained in full, this
- * software may be distributed under the terms of the GNU General
- * Public License ("GPL") version 2, in which case the provisions of the
- * GPL apply INSTEAD OF those given above.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
*/
#include <linux/module.h>
@@ -148,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_CODEL_TARGET]) {
u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
- q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.target,
+ ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_CE_THRESHOLD]) {
u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
- q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.ce_threshold,
+ (val * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_INTERVAL]) {
u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
- q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->params.interval,
+ ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_CODEL_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_CODEL_LIMIT]));
if (tb[TCA_CODEL_ECN])
- q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+ WRITE_ONCE(q->params.ecn,
+ !!nla_get_u32(tb[TCA_CODEL_ECN]));
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
@@ -213,6 +188,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt,
static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct codel_sched_data *q = qdisc_priv(sch);
+ codel_time_t ce_threshold;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -220,17 +196,18 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_CODEL_TARGET,
- codel_time_to_us(q->params.target)) ||
+ codel_time_to_us(READ_ONCE(q->params.target))) ||
nla_put_u32(skb, TCA_CODEL_LIMIT,
- sch->limit) ||
+ READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_CODEL_INTERVAL,
- codel_time_to_us(q->params.interval)) ||
+ codel_time_to_us(READ_ONCE(q->params.interval))) ||
nla_put_u32(skb, TCA_CODEL_ECN,
- q->params.ecn))
+ READ_ONCE(q->params.ecn)))
goto nla_put_failure;
- if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ ce_threshold = READ_ONCE(q->params.ce_threshold);
+ if (ce_threshold != CODEL_DISABLED_THRESHOLD &&
nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
- codel_time_to_us(q->params.ce_threshold)))
+ codel_time_to_us(ce_threshold)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -287,6 +264,7 @@ static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
.dump_stats = codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("codel");
static int __init codel_module_init(void)
{
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 097740a9af..c69b999fae 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -481,6 +481,7 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
.destroy = drr_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("drr");
static int __init drr_init(void)
{
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 4808159a54..c74d778c32 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -467,15 +467,15 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!nest)
goto nla_put_failure;
- opt.delta = q->delta;
- opt.clockid = q->clockid;
- if (q->offload)
+ opt.delta = READ_ONCE(q->delta);
+ opt.clockid = READ_ONCE(q->clockid);
+ if (READ_ONCE(q->offload))
opt.flags |= TC_ETF_OFFLOAD_ON;
- if (q->deadline_mode)
+ if (READ_ONCE(q->deadline_mode))
opt.flags |= TC_ETF_DEADLINE_MODE_ON;
- if (q->skip_sock_check)
+ if (READ_ONCE(q->skip_sock_check))
opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
@@ -500,6 +500,7 @@ static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
.dump = etf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("etf");
static int __init etf_module_init(void)
{
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index f7c8849594..f80bc05d4c 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -646,7 +646,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
- q->nbands = nbands;
+ WRITE_ONCE(q->nbands, nbands);
for (i = nstrict; i < q->nstrict; i++) {
if (q->classes[i].qdisc->q.qlen) {
list_add_tail(&q->classes[i].alist, &q->active);
@@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
list_del(&q->classes[i].alist);
qdisc_tree_flush_backlog(q->classes[i].qdisc);
}
- q->nstrict = nstrict;
+ WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
for (i = 0; i < q->nbands; i++)
- q->classes[i].quantum = quanta[i];
+ WRITE_ONCE(q->classes[i].quantum, quanta[i]);
for (i = oldbands; i < q->nbands; i++) {
q->classes[i].qdisc = queues[i];
@@ -676,7 +676,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
for (i = q->nbands; i < oldbands; i++) {
qdisc_put(q->classes[i].qdisc);
q->classes[i].qdisc = NULL;
- q->classes[i].quantum = 0;
+ WRITE_ONCE(q->classes[i].quantum, 0);
q->classes[i].deficit = 0;
gnet_stats_basic_sync_init(&q->classes[i].bstats);
memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
@@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
struct ets_sched *q = qdisc_priv(sch);
struct nlattr *opts;
struct nlattr *nest;
+ u8 nbands, nstrict;
int band;
int prio;
int err;
@@ -745,21 +746,22 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
if (!opts)
goto nla_err;
- if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+ nbands = READ_ONCE(q->nbands);
+ if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
goto nla_err;
- if (q->nstrict &&
- nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+ nstrict = READ_ONCE(q->nstrict);
+ if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
goto nla_err;
- if (q->nbands > q->nstrict) {
+ if (nbands > nstrict) {
nest = nla_nest_start(skb, TCA_ETS_QUANTA);
if (!nest)
goto nla_err;
- for (band = q->nstrict; band < q->nbands; band++) {
+ for (band = nstrict; band < nbands; band++) {
if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
- q->classes[band].quantum))
+ READ_ONCE(q->classes[band].quantum)))
goto nla_err;
}
@@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_err;
for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
- if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+ if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
+ READ_ONCE(q->prio2band[prio])))
goto nla_err;
}
@@ -812,6 +815,7 @@ static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
.dump = ets_qdisc_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ets");
static int __init ets_init(void)
{
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 450f5c67ac..b50b2c2cc0 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,7 +19,8 @@
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+ READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
@@ -28,7 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- if (likely(sch->q.qlen < sch->limit))
+ if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
@@ -39,7 +40,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
unsigned int prev_backlog;
- if (likely(sch->q.qlen < sch->limit))
+ if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog;
@@ -105,14 +106,14 @@ static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
- sch->limit = limit;
+ WRITE_ONCE(sch->limit, limit);
} else {
struct tc_fifo_qopt *ctl = nla_data(opt);
if (nla_len(opt) < sizeof(*ctl))
return -EINVAL;
- sch->limit = ctl->limit;
+ WRITE_ONCE(sch->limit, ctl->limit);
}
if (is_bfifo)
@@ -154,7 +155,7 @@ static void fifo_destroy(struct Qdisc *sch)
static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- struct tc_fifo_qopt opt = { .limit = sch->limit };
+ struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) };
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 3a31c47fea..2389747256 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -106,6 +106,8 @@ struct fq_perband_flows {
int quantum; /* based on band nr : 576KB, 192KB, 64KB */
};
+#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
+
struct fq_sched_data {
/* Read mostly cache line */
@@ -122,7 +124,7 @@ struct fq_sched_data {
u8 rate_enable;
u8 fq_trees_log;
u8 horizon_drop;
- u8 prio2band[(TC_PRIO_MAX + 1) >> 2];
+ u8 prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
u32 timer_slack; /* hrtimer slack in ns */
/* Read/Write fields. */
@@ -159,7 +161,7 @@ struct fq_sched_data {
/* return the i-th 2-bit value ("crumb") */
static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
{
- return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
+ return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
}
/*
@@ -888,7 +890,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
q->fq_root = array;
- q->fq_trees_log = log;
+ WRITE_ONCE(q->fq_trees_log, log);
sch_tree_unlock(sch);
@@ -927,11 +929,15 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
{
const int num_elems = TC_PRIO_MAX + 1;
+ u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE];
int i;
- memset(out, 0, num_elems / 4);
+ memset(tmp, 0, sizeof(tmp));
for (i = 0; i < num_elems; i++)
- out[i / 4] |= in[i] << (2 * (i & 0x3));
+ tmp[i / 4] |= in[i] << (2 * (i & 0x3));
+
+ for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++)
+ WRITE_ONCE(out[i], tmp[i]);
}
static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
@@ -958,7 +964,7 @@ static int fq_load_weights(struct fq_sched_data *q,
}
}
for (i = 0; i < FQ_BANDS; i++)
- q->band_flows[i].quantum = weights[i];
+ WRITE_ONCE(q->band_flows[i].quantum, weights[i]);
return 0;
}
@@ -1011,16 +1017,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
err = -EINVAL;
}
if (tb[TCA_FQ_PLIMIT])
- sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_FQ_PLIMIT]));
if (tb[TCA_FQ_FLOW_PLIMIT])
- q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+ WRITE_ONCE(q->flow_plimit,
+ nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]));
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
if (quantum > 0 && quantum <= (1 << 20)) {
- q->quantum = quantum;
+ WRITE_ONCE(q->quantum, quantum);
} else {
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
@@ -1028,7 +1036,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
- q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+ WRITE_ONCE(q->initial_quantum,
+ nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]));
if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
@@ -1037,17 +1046,19 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_FLOW_MAX_RATE]) {
u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
- q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+ WRITE_ONCE(q->flow_max_rate,
+ (rate == ~0U) ? ~0UL : rate);
}
if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
- q->low_rate_threshold =
- nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+ WRITE_ONCE(q->low_rate_threshold,
+ nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]));
if (tb[TCA_FQ_RATE_ENABLE]) {
u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
if (enable <= 1)
- q->rate_enable = enable;
+ WRITE_ONCE(q->rate_enable,
+ enable);
else
err = -EINVAL;
}
@@ -1055,7 +1066,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
- q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+ WRITE_ONCE(q->flow_refill_delay,
+ usecs_to_jiffies(usecs_delay));
}
if (!err && tb[TCA_FQ_PRIOMAP])
@@ -1065,21 +1077,26 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack);
if (tb[TCA_FQ_ORPHAN_MASK])
- q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+ WRITE_ONCE(q->orphan_mask,
+ nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]));
if (tb[TCA_FQ_CE_THRESHOLD])
- q->ce_threshold = (u64)NSEC_PER_USEC *
- nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+ WRITE_ONCE(q->ce_threshold,
+ (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]));
if (tb[TCA_FQ_TIMER_SLACK])
- q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+ WRITE_ONCE(q->timer_slack,
+ nla_get_u32(tb[TCA_FQ_TIMER_SLACK]));
if (tb[TCA_FQ_HORIZON])
- q->horizon = (u64)NSEC_PER_USEC *
- nla_get_u32(tb[TCA_FQ_HORIZON]);
+ WRITE_ONCE(q->horizon,
+ (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_HORIZON]));
if (tb[TCA_FQ_HORIZON_DROP])
- q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+ WRITE_ONCE(q->horizon_drop,
+ nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
if (!err) {
@@ -1160,13 +1177,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 ce_threshold = q->ce_threshold;
struct tc_prio_qopt prio = {
.bands = FQ_BANDS,
};
- u64 horizon = q->horizon;
struct nlattr *opts;
+ u64 ce_threshold;
s32 weights[3];
+ u64 horizon;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (opts == NULL)
@@ -1174,35 +1191,48 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
+ ce_threshold = READ_ONCE(q->ce_threshold);
do_div(ce_threshold, NSEC_PER_USEC);
+
+ horizon = READ_ONCE(q->horizon);
do_div(horizon, NSEC_PER_USEC);
- if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
- nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
- nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+ if (nla_put_u32(skb, TCA_FQ_PLIMIT,
+ READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
+ READ_ONCE(q->flow_plimit)) ||
+ nla_put_u32(skb, TCA_FQ_QUANTUM,
+ READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM,
+ READ_ONCE(q->initial_quantum)) ||
+ nla_put_u32(skb, TCA_FQ_RATE_ENABLE,
+ READ_ONCE(q->rate_enable)) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
- min_t(unsigned long, q->flow_max_rate, ~0U)) ||
+ min_t(unsigned long,
+ READ_ONCE(q->flow_max_rate), ~0U)) ||
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
- jiffies_to_usecs(q->flow_refill_delay)) ||
- nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+ jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) ||
+ nla_put_u32(skb, TCA_FQ_ORPHAN_MASK,
+ READ_ONCE(q->orphan_mask)) ||
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
- q->low_rate_threshold) ||
+ READ_ONCE(q->low_rate_threshold)) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
- nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
- nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+ nla_put_u32(skb, TCA_FQ_BUCKETS_LOG,
+ READ_ONCE(q->fq_trees_log)) ||
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
+ READ_ONCE(q->timer_slack)) ||
nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
- nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
+ nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
+ READ_ONCE(q->horizon_drop)))
goto nla_put_failure;
fq_prio2band_decompress_crumb(q->prio2band, prio.priomap);
if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio))
goto nla_put_failure;
- weights[0] = q->band_flows[0].quantum;
- weights[1] = q->band_flows[1].quantum;
- weights[2] = q->band_flows[2].quantum;
+ weights[0] = READ_ONCE(q->band_flows[0].quantum);
+ weights[1] = READ_ONCE(q->band_flows[1].quantum);
+ weights[2] = READ_ONCE(q->band_flows[2].quantum);
if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights))
goto nla_put_failure;
@@ -1264,6 +1294,7 @@ static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
.dump_stats = fq_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq");
static int __init fq_module_init(void)
{
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 8c4fee0634..4f908c11ba 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -396,40 +396,49 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_CODEL_TARGET]) {
u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
- q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.target,
+ (target * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
- q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.ce_threshold,
+ (val * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])
- q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]);
+ WRITE_ONCE(q->cparams.ce_threshold_selector,
+ nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]));
if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])
- q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]);
+ WRITE_ONCE(q->cparams.ce_threshold_mask,
+ nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]));
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
- q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ WRITE_ONCE(q->cparams.interval,
+ (interval * NSEC_PER_USEC) >> CODEL_SHIFT);
}
if (tb[TCA_FQ_CODEL_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
+ WRITE_ONCE(sch->limit,
+ nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]));
if (tb[TCA_FQ_CODEL_ECN])
- q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
+ WRITE_ONCE(q->cparams.ecn,
+ !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]));
if (quantum)
- q->quantum = quantum;
+ WRITE_ONCE(q->quantum, quantum);
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ WRITE_ONCE(q->drop_batch_size,
+ max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
- q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+ WRITE_ONCE(q->memory_limit,
+ min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])));
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
@@ -522,6 +531,7 @@ init_failure:
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ codel_time_t ce_threshold;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
- codel_time_to_us(q->cparams.target)) ||
+ codel_time_to_us(READ_ONCE(q->cparams.target))) ||
nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
- sch->limit) ||
+ READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
- codel_time_to_us(q->cparams.interval)) ||
+ codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
nla_put_u32(skb, TCA_FQ_CODEL_ECN,
- q->cparams.ecn) ||
+ READ_ONCE(q->cparams.ecn)) ||
nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
- q->quantum) ||
+ READ_ONCE(q->quantum)) ||
nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
- q->drop_batch_size) ||
+ READ_ONCE(q->drop_batch_size)) ||
nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
- q->memory_limit) ||
+ READ_ONCE(q->memory_limit)) ||
nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
- q->flows_cnt))
+ READ_ONCE(q->flows_cnt)))
goto nla_put_failure;
- if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
+ ce_threshold = READ_ONCE(q->cparams.ce_threshold);
+ if (ce_threshold != CODEL_DISABLED_THRESHOLD) {
if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
- codel_time_to_us(q->cparams.ce_threshold)))
+ codel_time_to_us(ce_threshold)))
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
+ READ_ONCE(q->cparams.ce_threshold_selector)))
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
+ if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK,
+ READ_ONCE(q->cparams.ce_threshold_mask)))
goto nla_put_failure;
}
@@ -717,6 +730,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq_codel");
static int __init fq_codel_module_init(void)
{
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 5b595773e5..c38f33ff80 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -10,6 +10,7 @@
*/
#include <linux/jhash.h>
+#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/vmalloc.h>
#include <net/pkt_cls.h>
@@ -298,8 +299,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
- q->p_params.limit = limit;
- sch->limit = limit;
+ WRITE_ONCE(q->p_params.limit, limit);
+ WRITE_ONCE(sch->limit, limit);
}
if (tb[TCA_FQ_PIE_FLOWS]) {
if (q->flows) {
@@ -321,39 +322,45 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
/* convert to pschedtime */
- q->p_params.target =
- PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ WRITE_ONCE(q->p_params.target,
+ PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
}
/* tupdate is in jiffies */
if (tb[TCA_FQ_PIE_TUPDATE])
- q->p_params.tupdate =
- usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
+ WRITE_ONCE(q->p_params.tupdate,
+ usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
if (tb[TCA_FQ_PIE_ALPHA])
- q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
+ WRITE_ONCE(q->p_params.alpha,
+ nla_get_u32(tb[TCA_FQ_PIE_ALPHA]));
if (tb[TCA_FQ_PIE_BETA])
- q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
+ WRITE_ONCE(q->p_params.beta,
+ nla_get_u32(tb[TCA_FQ_PIE_BETA]));
if (tb[TCA_FQ_PIE_QUANTUM])
- q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
+ WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]));
if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
- q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
+ WRITE_ONCE(q->memory_limit,
+ nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]));
if (tb[TCA_FQ_PIE_ECN_PROB])
- q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
+ WRITE_ONCE(q->ecn_prob,
+ nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]));
if (tb[TCA_FQ_PIE_ECN])
- q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
+ WRITE_ONCE(q->p_params.ecn,
+ nla_get_u32(tb[TCA_FQ_PIE_ECN]));
if (tb[TCA_FQ_PIE_BYTEMODE])
- q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
+ WRITE_ONCE(q->p_params.bytemode,
+ nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]));
if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
- q->p_params.dq_rate_estimator =
- nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
+ WRITE_ONCE(q->p_params.dq_rate_estimator,
+ nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]));
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
@@ -470,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
return -EMSGSIZE;
/* convert target from pschedtime to us */
- if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
+ if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) ||
nla_put_u32(skb, TCA_FQ_PIE_TARGET,
- ((u32)PSCHED_TICKS2NS(q->p_params.target)) /
+ ((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) /
NSEC_PER_USEC) ||
nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
- jiffies_to_usecs(q->p_params.tupdate)) ||
- nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
- nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
- nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
- nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
- nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
- nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
+ jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT,
+ READ_ONCE(q->memory_limit)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) ||
nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
- q->p_params.dq_rate_estimator))
+ READ_ONCE(q->p_params.dq_rate_estimator)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -563,6 +571,7 @@ static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
.dump_stats = fq_pie_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq_pie");
static int __init fq_pie_module_init(void)
{
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a498b5d7c5..e22ff003d5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -27,6 +27,7 @@
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/hotdata.h>
#include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h>
@@ -409,7 +410,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = READ_ONCE(dev_tx_weight);
+ int quota = READ_ONCE(net_hotdata.dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
@@ -505,19 +506,22 @@ static void dev_watchdog(struct timer_list *t)
unsigned int timedout_ms = 0;
unsigned int i;
unsigned long trans_start;
+ unsigned long oldest_start = jiffies;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
trans_start = READ_ONCE(txq->trans_start);
- if (netif_xmit_stopped(txq) &&
- time_after(jiffies, (trans_start +
- dev->watchdog_timeo))) {
+ if (!netif_xmit_stopped(txq))
+ continue;
+ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
break;
}
+ if (time_after(oldest_start, trans_start))
+ oldest_start = trans_start;
}
if (unlikely(timedout_ms)) {
@@ -530,7 +534,7 @@ static void dev_watchdog(struct timer_list *t)
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies +
+ round_jiffies(oldest_start +
dev->watchdog_timeo)))
release = false;
}
@@ -672,6 +676,7 @@ struct Qdisc noop_qdisc = {
.qlen = 0,
.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
},
+ .owner = -1,
};
EXPORT_SYMBOL(noop_qdisc);
@@ -944,7 +949,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
+ lockdep_register_key(&sch->root_lock_key);
spin_lock_init(&sch->q.lock);
+ lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
@@ -979,6 +986,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
+ lockdep_unregister_key(&sch->root_lock_key);
kfree(sch);
errout:
return ERR_PTR(err);
@@ -1067,6 +1075,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
if (ops->destroy)
ops->destroy(qdisc);
+ lockdep_unregister_key(&qdisc->root_lock_key);
module_put(ops->owner);
netdev_put(dev, &qdisc->dev_tracker);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8c61eb3dc9..79ba9dc702 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -930,6 +930,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.dump = gred_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("gred");
static int __init gred_module_init(void)
{
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 16c45da403..c287bf8423 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1174,7 +1174,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
/* classification failed, try default class */
- cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+ cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle),
+ READ_ONCE(q->defcls)), sch);
if (cl == NULL || cl->level > 0)
return NULL;
@@ -1443,9 +1444,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
qopt = nla_data(opt);
- sch_tree_lock(sch);
- q->defcls = qopt->defcls;
- sch_tree_unlock(sch);
+ WRITE_ONCE(q->defcls, qopt->defcls);
return 0;
}
@@ -1525,7 +1524,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
- qopt.defcls = q->defcls;
+ qopt.defcls = READ_ONCE(q->defcls);
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
goto nla_put_failure;
return skb->len;
@@ -1679,6 +1678,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct hfsc_sched),
.owner = THIS_MODULE
};
+MODULE_ALIAS_NET_SCH("hfsc");
static int __init
hfsc_init(void)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index d26cd436cb..44d9efe1a9 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -534,27 +534,31 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (tb[TCA_HHF_BACKLOG_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+ WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]));
- q->quantum = new_quantum;
- q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+ WRITE_ONCE(q->quantum, new_quantum);
+ WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight);
if (tb[TCA_HHF_HH_FLOWS_LIMIT])
- q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+ WRITE_ONCE(q->hh_flows_limit,
+ nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]));
if (tb[TCA_HHF_RESET_TIMEOUT]) {
u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
- q->hhf_reset_timeout = usecs_to_jiffies(us);
+ WRITE_ONCE(q->hhf_reset_timeout,
+ usecs_to_jiffies(us));
}
if (tb[TCA_HHF_ADMIT_BYTES])
- q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+ WRITE_ONCE(q->hhf_admit_bytes,
+ nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]));
if (tb[TCA_HHF_EVICT_TIMEOUT]) {
u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
- q->hhf_evict_timeout = usecs_to_jiffies(us);
+ WRITE_ONCE(q->hhf_evict_timeout,
+ usecs_to_jiffies(us));
}
qlen = sch->q.qlen;
@@ -657,15 +661,18 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
- nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+ if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) ||
+ nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT,
+ READ_ONCE(q->hh_flows_limit)) ||
nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
- jiffies_to_usecs(q->hhf_reset_timeout)) ||
- nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+ jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) ||
+ nla_put_u32(skb, TCA_HHF_ADMIT_BYTES,
+ READ_ONCE(q->hhf_admit_bytes)) ||
nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
- jiffies_to_usecs(q->hhf_evict_timeout)) ||
- nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+ jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) ||
+ nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT,
+ READ_ONCE(q->hhf_non_hh_weight)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -702,6 +709,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.dump_stats = hhf_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("hhf");
static int __init hhf_module_init(void)
{
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7349233eaa..ff3de37874 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}
-static void htb_set_lockdep_class_child(struct Qdisc *q)
-{
- static struct lock_class_key child_key;
-
- lockdep_set_class(qdisc_lock(q), &child_key);
-}
-
static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
{
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
@@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
return -ENOMEM;
}
- htb_set_lockdep_class_child(qdisc);
q->direct_qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
}
if (q->offload) {
- htb_set_lockdep_class_child(new);
/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
qdisc_refcount_inc(new);
old_q = htb_graft_helper(dev_queue, new);
@@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
- if (q->offload) {
- if (new_q)
- htb_set_lockdep_class_child(new_q);
+ if (q->offload)
htb_parent_to_leaf_offload(sch, dev_queue, new_q);
- }
}
sch_tree_lock(sch);
@@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
classid, NULL);
if (q->offload) {
- if (new_q) {
- htb_set_lockdep_class_child(new_q);
- /* One ref for cl->leaf.q, the other for
- * dev_queue->qdisc.
- */
+ /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+ if (new_q)
qdisc_refcount_inc(new_q);
- }
old_q = htb_graft_helper(dev_queue, new_q);
/* No qdisc_put needed. */
WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
@@ -2166,6 +2150,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.dump = htb_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("htb");
static int __init htb_module_init(void)
{
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5fa9eaa79b..cc6051d4f2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
- tcx_miniq_set_active(entry, false);
+ tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
@@ -168,6 +168,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.ingress_block_get = ingress_ingress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ingress");
struct clsact_sched_data {
struct tcf_block *ingress_block;
@@ -256,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -275,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
@@ -301,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
- tcx_miniq_set_active(ingress_entry, false);
+ tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
@@ -309,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
- tcx_miniq_set_active(egress_entry, false);
+ tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
@@ -344,6 +345,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.egress_block_get = clsact_egress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("clsact");
static int __init ingress_module_init(void)
{
@@ -368,6 +370,5 @@ static void __exit ingress_module_exit(void)
module_init(ingress_module_init);
module_exit(ingress_module_exit);
-MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 43e53ee00a..51d4013b61 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -215,10 +215,8 @@ static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt,
for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
fp[tc] = priv->fp[tc];
- nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) {
- if (nla_type(n) != TCA_MQPRIO_TC_ENTRY)
- continue;
-
+ nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt,
+ nlattr_opt_len, rem) {
err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack);
if (err)
goto out;
@@ -774,6 +772,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
.dump = mqprio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("mqprio");
static int __init mqprio_module_init(void)
{
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index d66d5f0ec0..06e03f5cd7 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
- removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+ removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands),
GFP_KERNEL);
if (!removed)
return -ENOMEM;
@@ -395,6 +395,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.dump = multiq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("multiq");
static int __init multiq_module_init(void)
{
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fa678eb885..edc72962ae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1293,6 +1293,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.dump = netem_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("netem");
static int __init netem_module_init(void)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2da6250ec3..b3dcb845b3 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -156,36 +156,38 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
/* convert to pschedtime */
- q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ WRITE_ONCE(q->params.target,
+ PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
}
/* tupdate is in jiffies */
if (tb[TCA_PIE_TUPDATE])
- q->params.tupdate =
- usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+ WRITE_ONCE(q->params.tupdate,
+ usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])));
if (tb[TCA_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
- q->params.limit = limit;
- sch->limit = limit;
+ WRITE_ONCE(q->params.limit, limit);
+ WRITE_ONCE(sch->limit, limit);
}
if (tb[TCA_PIE_ALPHA])
- q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
+ WRITE_ONCE(q->params.alpha, nla_get_u32(tb[TCA_PIE_ALPHA]));
if (tb[TCA_PIE_BETA])
- q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
+ WRITE_ONCE(q->params.beta, nla_get_u32(tb[TCA_PIE_BETA]));
if (tb[TCA_PIE_ECN])
- q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
+ WRITE_ONCE(q->params.ecn, nla_get_u32(tb[TCA_PIE_ECN]));
if (tb[TCA_PIE_BYTEMODE])
- q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+ WRITE_ONCE(q->params.bytemode,
+ nla_get_u32(tb[TCA_PIE_BYTEMODE]));
if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
- q->params.dq_rate_estimator =
- nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
+ WRITE_ONCE(q->params.dq_rate_estimator,
+ nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]));
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
@@ -469,17 +471,18 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
/* convert target from pschedtime to us */
if (nla_put_u32(skb, TCA_PIE_TARGET,
- ((u32)PSCHED_TICKS2NS(q->params.target)) /
+ ((u32)PSCHED_TICKS2NS(READ_ONCE(q->params.target))) /
NSEC_PER_USEC) ||
- nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_PIE_LIMIT, READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_PIE_TUPDATE,
- jiffies_to_usecs(q->params.tupdate)) ||
- nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
- nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
+ jiffies_to_usecs(READ_ONCE(q->params.tupdate))) ||
+ nla_put_u32(skb, TCA_PIE_ALPHA, READ_ONCE(q->params.alpha)) ||
+ nla_put_u32(skb, TCA_PIE_BETA, READ_ONCE(q->params.beta)) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
- nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
+ nla_put_u32(skb, TCA_PIE_BYTEMODE,
+ READ_ONCE(q->params.bytemode)) ||
nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
- q->params.dq_rate_estimator))
+ READ_ONCE(q->params.dq_rate_estimator)))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -556,6 +559,7 @@ static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
.dump_stats = pie_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("pie");
static int __init pie_module_init(void)
{
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 992f0c8d79..cefb65201e 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -213,6 +213,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("plug");
static int __init plug_module_init(void)
{
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8ecdd3ef6f..cc30f7a32f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -418,6 +418,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.dump = prio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("prio");
static int __init prio_module_init(void)
{
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 48a604c320..d584c0c258 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1521,6 +1521,7 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
.destroy = qfq_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("qfq");
static int __init qfq_init(void)
{
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 607b6c8b3a..b5f096588f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -548,6 +548,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.dump_stats = red_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("red");
static int __init red_module_init(void)
{
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1871a1c022..b717e15a3a 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -709,6 +709,7 @@ static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
.dump_stats = sfb_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfb");
static int __init sfb_module_init(void)
{
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index eb77558fa3..3b9245a3c7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -608,6 +608,7 @@ static void sfq_perturbation(struct timer_list *t)
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
siphash_key_t nkey;
+ int period;
get_random_bytes(&nkey, sizeof(nkey));
rcu_read_lock();
@@ -618,8 +619,12 @@ static void sfq_perturbation(struct timer_list *t)
sfq_rehash(sch);
spin_unlock(root_lock);
- if (q->perturb_period)
- mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ /* q->perturb_period can change under us from
+ * sfq_change() and sfq_destroy().
+ */
+ period = READ_ONCE(q->perturb_period);
+ if (period)
+ mod_timer(&q->perturb_timer, jiffies + period);
rcu_read_unlock();
}
@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
q->quantum = ctl->quantum;
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
}
- q->perturb_period = ctl->perturb_period * HZ;
+ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- q->perturb_period = 0;
+ WRITE_ONCE(q->perturb_period, 0);
del_timer_sync(&q->perturb_timer);
sfq_free(q->ht);
sfq_free(q->slots);
@@ -925,6 +930,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.dump = sfq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfq");
static int __init sfq_module_init(void)
{
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 28beb11762..20ff7386b7 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -79,7 +79,9 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
prio = min(skb->priority, max_priority);
qdisc = &q->qdiscs[prio];
- if (sch->q.qlen < sch->limit) {
+
+ /* sch->limit can change under us from skbprio_change() */
+ if (sch->q.qlen < READ_ONCE(sch->limit)) {
__skb_queue_tail(qdisc, skb);
qdisc_qstats_backlog_inc(sch, skb);
q->qstats[prio].backlog += qdisc_pkt_len(skb);
@@ -172,7 +174,7 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
return -EINVAL;
- sch->limit = ctl->limit;
+ WRITE_ONCE(sch->limit, ctl->limit);
return 0;
}
@@ -200,7 +202,7 @@ static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tc_skbprio_qopt opt;
- opt.limit = sch->limit;
+ opt.limit = READ_ONCE(sch->limit);
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
return -1;
@@ -292,6 +294,7 @@ static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
.destroy = skbprio_destroy,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("skbprio");
static int __init skbprio_module_init(void)
{
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index ad99409c63..b284a06b5a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -40,6 +40,8 @@ static struct static_key_false taprio_have_working_mqprio;
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_SUPPORTED_FLAGS \
+ (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
@@ -408,19 +410,6 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
return entry;
}
-static bool taprio_flags_valid(u32 flags)
-{
- /* Make sure no other flag bits are set. */
- if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
- TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- /* txtime-assist and full offload are mutually exclusive */
- if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
- (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- return true;
-}
-
/* This returns the tstamp value set by TCP in terms of the set clock. */
static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
{
@@ -1032,7 +1021,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
- [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_FLAGS] =
+ NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
};
@@ -1161,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- if (!cycle) {
- NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
- return -EINVAL;
- }
-
if (cycle < 0 || cycle > INT_MAX) {
NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
return -EINVAL;
@@ -1174,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
new->cycle_time = cycle;
}
+ if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
+ return -EINVAL;
+ }
+
taprio_calculate_gate_durations(q, new);
return 0;
@@ -1186,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
{
bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
- if (!qopt && !dev->num_tc) {
- NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
- return -EINVAL;
- }
-
- /* If num_tc is already set, it means that the user already
- * configured the mqprio part
- */
- if (dev->num_tc)
+ if (!qopt) {
+ if (!dev->num_tc) {
+ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+ return -EINVAL;
+ }
return 0;
+ }
/* taprio imposes that traffic classes map 1:n to tx queues */
if (qopt->num_tc > dev->num_tx_queues) {
@@ -1762,10 +1749,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
fp[tc] = q->fp[tc];
}
- nla_for_each_nested(n, opt, rem) {
- if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
- continue;
-
+ nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) {
err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
extack);
if (err)
@@ -1816,33 +1800,6 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}
-/* The semantics of the 'flags' argument in relation to 'change()'
- * requests, are interpreted following two rules (which are applied in
- * this order): (1) an omitted 'flags' argument is interpreted as
- * zero; (2) the 'flags' of a "running" taprio instance cannot be
- * changed.
- */
-static int taprio_new_flags(const struct nlattr *attr, u32 old,
- struct netlink_ext_ack *extack)
-{
- u32 new = 0;
-
- if (attr)
- new = nla_get_u32(attr);
-
- if (old != TAPRIO_FLAGS_INVALID && old != new) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- }
-
- if (!taprio_flags_valid(new)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
-
- return new;
-}
-
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1853,6 +1810,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
unsigned long flags;
+ u32 taprio_flags;
ktime_t start;
int i, err;
@@ -1864,12 +1822,31 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
- q->flags, extack);
- if (err < 0)
- return err;
+ /* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+ taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
- q->flags = err;
+ /* txtime-assist and full offload are mutually exclusive */
+ if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+ (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
+ "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
+ return -EINVAL;
+ }
+
+ if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+ q->flags = taprio_flags;
+
+ /* Needed for length_to_duration() during netlink attribute parsing */
+ taprio_set_picos_per_byte(dev, q);
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
@@ -1930,7 +1907,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
goto free_sched;
- taprio_set_picos_per_byte(dev, q);
taprio_update_queue_max_sdu(q, new_admin, stab);
if (FULL_OFFLOAD_IS_ENABLED(q->flags))
@@ -2549,6 +2525,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.dump_stats = taprio_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("taprio");
static struct notifier_block taprio_device_notifier = {
.notifier_call = taprio_dev_notifier,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dd6b1a723b..f1d09183ae 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -608,6 +608,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.dump = tbf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("tbf");
static int __init tbf_module_init(void)
{
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 59304611dc..8badec6d82 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
- if (q->q.qlen < dev->tx_queue_len) {
+ if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
__skb_queue_tail(&q->q, skb);
return NET_XMIT_SUCCESS;
}
@@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
} while ((q = NEXT_SLAVE(q)) != m->slaves);
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index eb05131ff1..23359e5222 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -507,6 +507,7 @@ done:
}
static const struct inet_diag_handler sctp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = sctp_diag_dump,
.dump_one = sctp_diag_dump_one,
.idiag_get_info = sctp_diag_get_info,
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 17fcaa9b0d..a8a254a500 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
struct sock *sk = ep->base.sk;
struct net *net = sock_net(sk);
struct sctp_hashbucket *head;
+ int err = 0;
ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port);
head = &sctp_ep_hashtable[ep->hashent];
+ write_lock(&head->lock);
if (sk->sk_reuseport) {
bool any = sctp_is_ep_boundall(sk);
struct sctp_endpoint *ep2;
struct list_head *list;
- int cnt = 0, err = 1;
+ int cnt = 0;
+
+ err = 1;
list_for_each(list, &ep->base.bind_addr.address_list)
cnt++;
@@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
if (!err) {
err = reuseport_add_sock(sk, sk2, any);
if (err)
- return err;
+ goto out;
break;
} else if (err < 0) {
- return err;
+ goto out;
}
}
if (err) {
err = reuseport_alloc(sk, any);
if (err)
- return err;
+ goto out;
}
}
- write_lock(&head->lock);
hlist_add_head(&ep->node, &head->chain);
+out:
write_unlock(&head->lock);
- return 0;
+ return err;
}
/* Add an endpoint to the hash. Local BH-safe. */
@@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
head = &sctp_ep_hashtable[ep->hashent];
+ write_lock(&head->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
-
- write_lock(&head->lock);
hlist_del_init(&ep->node);
write_unlock(&head->lock);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 24368f755a..f7b809c0d1 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -415,7 +415,7 @@ out:
if (!IS_ERR_OR_NULL(dst)) {
struct rt6_info *rt;
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
t->dst_cookie = rt6_get_cookie(rt);
pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 94c6dd53cd..5a7436a13b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
struct flowi *fl)
{
union sctp_addr *saddr = &t->saddr;
- struct rtable *rt = (struct rtable *)t->dst;
+ struct rtable *rt = dst_rtable(t->dst);
if (rt) {
saddr->v4.sin_family = AF_INET;
@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_reset_inner_mac_header(skb);
skb_reset_inner_transport_header(skb);
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
- udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
+ udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
sctp_sk(sk)->udp_port, t->encap_port, false, false);
return 0;
@@ -1495,17 +1495,11 @@ static __init int sctp_init(void)
/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
- sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
- sizeof(struct sctp_bind_bucket),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_bucket_cachep = KMEM_CACHE(sctp_bind_bucket, SLAB_HWCACHE_ALIGN);
if (!sctp_bucket_cachep)
goto out;
- sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
- sizeof(struct sctp_chunk),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_chunk_cachep = KMEM_CACHE(sctp_chunk, SLAB_HWCACHE_ALIGN);
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 08fdf1251f..5adf0c0a6c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -38,6 +38,7 @@
#include <linux/inet.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <net/inet_ecn.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b9fcdb095..c009383369 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -67,6 +67,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
+#include <net/rps.h>
/* Forward declarations for internal helper functions. */
static bool sctp_writeable(const struct sock *sk);
@@ -4846,7 +4847,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
* descriptor will be returned from accept() to represent the newly
* formed association.
*/
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
+static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct sctp_sock *sp;
struct sctp_endpoint *ep;
@@ -4870,7 +4871,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
goto out;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
error = sctp_wait_for_accept(sk, timeo);
if (error)
@@ -4881,7 +4882,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
*/
asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
- newsk = sp->pf->create_accept_sk(sk, asoc, kern);
+ newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern);
if (!newsk) {
error = -ENOMEM;
goto out;
@@ -4898,7 +4899,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
out:
release_sock(sk);
- *err = error;
+ arg->err = error;
return newsk;
}
@@ -7118,6 +7119,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
struct sctp_assoc_ids *ids;
+ size_t ids_size;
u32 num = 0;
if (sctp_style(sk, TCP))
@@ -7130,11 +7132,11 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
num++;
}
- if (len < sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num)
+ ids_size = struct_size(ids, gaids_assoc_id, num);
+ if (len < ids_size)
return -EINVAL;
- len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
-
+ len = ids_size;
ids = kmalloc(len, GFP_USER | __GFP_NOWARN);
if (unlikely(!ids))
return -ENOMEM;
@@ -9275,7 +9277,7 @@ void sctp_data_ready(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index f65d6f92af..61c6f3027e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -80,8 +80,6 @@ static struct ctl_table sctp_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-
- { /* sentinel */ }
};
/* The following index defines are used in sctp_sysctl_net_register().
@@ -384,8 +382,6 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &pf_expose_max,
},
-
- { /* sentinel */ }
};
static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
@@ -597,6 +593,7 @@ static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
int sctp_sysctl_net_register(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(sctp_net_table);
struct ctl_table *table;
int i;
@@ -604,7 +601,7 @@ int sctp_sysctl_net_register(struct net *net)
if (!table)
return -ENOMEM;
- for (i = 0; table[i].data; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max;
@@ -613,8 +610,7 @@ int sctp_sysctl_net_register(struct net *net)
table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp",
- table,
- ARRAY_SIZE(sctp_net_table));
+ table, table_size);
if (net->sctp.sysctl_header == NULL) {
kfree(table);
return -ENOMEM;
@@ -624,7 +620,7 @@ int sctp_sysctl_net_register(struct net *net)
void sctp_sysctl_net_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->sctp.sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->sctp.sysctl_header);
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 746be39967..ba5e6a2dd2 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -20,3 +20,16 @@ config SMC_DIAG
smcss.
if unsure, say Y.
+
+config SMC_LO
+ bool "SMC intra-OS shortcut with loopback-ism"
+ depends on SMC
+ default n
+ help
+ SMC_LO enables the creation of an Emulated-ISM device named
+ loopback-ism in SMC and makes use of it for transferring data
+ when communication occurs within the same OS. This helps in
+ convenient testing of SMC-D since loopback-ism is independent
+ of architecture or hardware.
+
+ if unsure, say N.
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 875efcd126..2c510d5430 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
+smc-$(CONFIG_SMC_LO) += smc_loopback.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0f53a5c6fd..c5f98c6b25 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -53,6 +53,7 @@
#include "smc_stats.h"
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
+#include "smc_loopback.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -177,7 +178,7 @@ static struct smc_hashinfo smc_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};
-int smc_hash_sk(struct sock *sk)
+static int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
struct hlist_head *head;
@@ -191,9 +192,8 @@ int smc_hash_sk(struct sock *sk)
return 0;
}
-EXPORT_SYMBOL_GPL(smc_hash_sk);
-void smc_unhash_sk(struct sock *sk)
+static void smc_unhash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -202,7 +202,6 @@ void smc_unhash_sk(struct sock *sk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
-EXPORT_SYMBOL_GPL(smc_unhash_sk);
/* This will be called before user really release sock_lock. So do the
* work which we didn't do because of user hold the sock_lock in the
@@ -460,29 +459,11 @@ out:
static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
- struct net *nnet = sock_net(nsk);
-
nsk->sk_userlocks = osk->sk_userlocks;
- if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
nsk->sk_sndbuf = osk->sk_sndbuf;
- } else {
- if (mask == SK_FLAGS_SMC_TO_CLC)
- WRITE_ONCE(nsk->sk_sndbuf,
- READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
- else
- WRITE_ONCE(nsk->sk_sndbuf,
- 2 * READ_ONCE(nnet->smc.sysctl_wmem));
- }
- if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
nsk->sk_rcvbuf = osk->sk_rcvbuf;
- } else {
- if (mask == SK_FLAGS_SMC_TO_CLC)
- WRITE_ONCE(nsk->sk_rcvbuf,
- READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
- else
- WRITE_ONCE(nsk->sk_rcvbuf,
- 2 * READ_ONCE(nnet->smc.sysctl_rmem));
- }
}
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
@@ -1046,7 +1027,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
int i = 1, entry = 1;
- bool is_virtual;
+ bool is_emulated;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
@@ -1058,12 +1039,12 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
- is_virtual = __smc_ism_is_virtual(chid);
+ is_emulated = __smc_ism_is_emulated(chid);
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
- if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
/* It's the last GID-CHID entry left in CLC
- * Proposal SMC-Dv2 extension, but a virtual
+ * Proposal SMC-Dv2 extension, but an Emulated-
* ISM device will take two entries. So give
* up it and try the next potential ISM device.
*/
@@ -1073,7 +1054,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
ini->is_smcd = true;
rc = 0;
i++;
- entry = is_virtual ? entry + 2 : entry + 1;
+ entry = is_emulated ? entry + 2 : entry + 1;
if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
break;
}
@@ -1414,10 +1395,10 @@ static int smc_connect_ism(struct smc_sock *smc,
if (rc)
return rc;
- if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
ini->ism_peer_gid[ini->ism_selected].gid_ext =
ntohll(aclc->d1.gid_ext);
- /* for non-virtual ISM devices, peer gid_ext remains 0. */
+ /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
}
ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
@@ -1437,6 +1418,14 @@ static int smc_connect_ism(struct smc_sock *smc,
}
smc_conn_save_peer_info(smc, aclc);
+
+ if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(smc);
+ if (rc) {
+ rc = SMC_CLC_DECL_MEM; /* try to fallback */
+ goto connect_abort;
+ }
+ }
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -2118,10 +2107,10 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
- if (__smc_ism_is_virtual(proposed_chid))
+ if (__smc_ism_is_emulated(proposed_chid))
ini->ism_peer_gid[*matches].gid_ext =
proposed_gid->gid_ext;
- /* non-virtual ISM's peer gid_ext remains 0. */
+ /* non-Emulated-ISM's peer gid_ext remains 0. */
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
@@ -2171,10 +2160,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
smcd_gid.gid_ext = 0;
chid = ntohs(smcd_v2_ext->gidchid[i].chid);
- if (__smc_ism_is_virtual(chid)) {
+ if (__smc_ism_is_emulated(chid)) {
if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
- /* each virtual ISM device takes two GID-CHID
+ /* each Emulated-ISM device takes two GID-CHID
* entries and CHID of the second entry repeats
* that of the first entry.
*
@@ -2541,6 +2530,14 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, cclc);
+
+ if (ini->is_smcd &&
+ smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(new_smc);
+ if (rc)
+ goto out_decl;
+ }
+
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
@@ -2674,7 +2671,7 @@ out:
}
static int smc_accept(struct socket *sock, struct socket *new_sock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *nsk;
DECLARE_WAITQUEUE(wait, current);
@@ -2693,7 +2690,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
}
/* Wait for an incoming connection */
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
add_wait_queue_exclusive(sk_sleep(sk), &wait);
while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -2720,7 +2717,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
if (rc)
goto out;
- if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
+ if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
/* wait till data arrives on the socket */
timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
MSEC_PER_SEC);
@@ -3557,15 +3554,23 @@ static int __init smc_init(void)
goto out_sock;
}
+ rc = smc_loopback_init();
+ if (rc) {
+ pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
+ goto out_ib;
+ }
+
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_ib;
+ goto out_lo;
}
static_branch_enable(&tcp_have_smc);
return 0;
+out_lo:
+ smc_loopback_exit();
out_ib:
smc_ib_unregister_client();
out_sock:
@@ -3603,6 +3608,7 @@ static void __exit smc_exit(void)
tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
+ smc_loopback_exit();
smc_ib_unregister_client();
smc_ism_exit();
destroy_workqueue(smc_close_wq);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index df64efd2de..18c8b78701 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -56,11 +56,11 @@ enum smc_state { /* possible states of an SMC socket */
};
enum smc_supplemental_features {
- SMC_SPF_VIRT_ISM_DEV = 0,
+ SMC_SPF_EMULATED_ISM_DEV = 0,
};
#define SMC_FEATURE_MASK \
- (BIT(SMC_SPF_VIRT_ISM_DEV))
+ (BIT(SMC_SPF_EMULATED_ISM_DEV))
struct smc_link_group;
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3c06625ceb..619b3bab38 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -18,6 +18,7 @@
#include "smc_tx.h"
#include "smc_rx.h"
#include "smc_close.h"
+#include "smc_ism.h"
/********************************** send *************************************/
@@ -255,6 +256,14 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
return rc;
smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ /* if local sndbuf shares the same memory region with
+ * peer DMB, then don't update the tx_curs_fin
+ * and sndbuf_space until peer has consumed the data.
+ */
+ return 0;
+
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -266,7 +275,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn);
smc_tx_sndbuf_nonfull(smc);
- return rc;
+ return 0;
}
/********************************* receive ***********************************/
@@ -323,7 +332,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
{
union smc_host_cursor cons_old, prod_old;
struct smc_connection *conn = &smc->conn;
- int diff_cons, diff_prod;
+ int diff_cons, diff_prod, diff_tx;
smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn);
smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);
@@ -339,6 +348,29 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
atomic_add(diff_cons, &conn->peer_rmbe_space);
/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
smp_mb__after_atomic();
+
+ /* if local sndbuf shares the same memory region with
+ * peer RMB, then update tx_curs_fin and sndbuf_space
+ * here since peer has already consumed the data.
+ */
+ if (conn->lgr->is_smcd &&
+ smc_ism_support_dmb_nocopy(conn->lgr->smcd)) {
+ /* Calculate consumed data and
+ * increment free send buffer space.
+ */
+ diff_tx = smc_curs_diff(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons);
+ /* increase local sndbuf space and fin_curs */
+ smp_mb__before_atomic();
+ atomic_add(diff_tx, &conn->sndbuf_space);
+ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
+ smp_mb__after_atomic();
+ smc_curs_copy(&conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons, conn);
+
+ smc_tx_sndbuf_nonfull(smc);
+ }
}
diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 9a13709bea..33fa787c28 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -853,8 +853,10 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_smcd = &pclc->pclc_smcd;
pclc_prfx = &pclc->pclc_prfx;
ipv6_prfx = pclc->pclc_prfx_ipv6;
- v2_ext = &pclc->pclc_v2_ext;
- smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
+ v2_ext = container_of(&pclc->pclc_v2_ext,
+ struct smc_clc_v2_extension, fixed);
+ smcd_v2_ext = container_of(&pclc->pclc_smcd_v2_ext,
+ struct smc_clc_smcd_v2_extension, fixed);
gidchids = pclc->pclc_gidchids;
trl = &pclc->pclc_trl;
@@ -952,8 +954,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
gidchids[entry].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
gidchids[entry].gid = htonll(smcd_gid.gid);
- if (smc_ism_is_virtual(smcd)) {
- /* a virtual ISM device takes two
+ if (smc_ism_is_emulated(smcd)) {
+ /* an Emulated-ISM device takes two
* entries. CHID of the second entry
* repeats that of the first entry.
*/
@@ -1055,7 +1057,7 @@ smcd_clc_prep_confirm_accept(struct smc_connection *conn,
clc->d1.chid = htons(chid);
if (eid && eid[0])
memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
- if (__smc_ism_is_virtual(chid))
+ if (__smc_ism_is_emulated(chid))
clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index a9f9bdd26d..467effb50c 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -134,12 +134,15 @@ struct smc_clc_smcd_gid_chid {
*/
struct smc_clc_v2_extension {
- struct smc_clnt_opts_area_hdr hdr;
- u8 roce[16]; /* RoCEv2 GID */
- u8 max_conns;
- u8 max_links;
- __be16 feature_mask;
- u8 reserved[12];
+ /* New members must be added within the struct_group() macro below. */
+ struct_group_tagged(smc_clc_v2_extension_fixed, fixed,
+ struct smc_clnt_opts_area_hdr hdr;
+ u8 roce[16]; /* RoCEv2 GID */
+ u8 max_conns;
+ u8 max_links;
+ __be16 feature_mask;
+ u8 reserved[12];
+ );
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -159,8 +162,11 @@ struct smc_clc_msg_smcd { /* SMC-D GID information */
};
struct smc_clc_smcd_v2_extension {
- u8 system_eid[SMC_MAX_EID_LEN];
- u8 reserved[16];
+ /* New members must be added within the struct_group() macro below. */
+ struct_group_tagged(smc_clc_smcd_v2_extension_fixed, fixed,
+ u8 system_eid[SMC_MAX_EID_LEN];
+ u8 reserved[16];
+ );
struct smc_clc_smcd_gid_chid gidchid[];
};
@@ -175,7 +181,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC
* proposal SMC-Dv2 extension.
* each ISM device takes one entry and
- * each virtual ISM takes two entries.
+ * each Emulated-ISM takes two entries
*/
struct smc_clc_msg_proposal_area {
@@ -183,9 +189,9 @@ struct smc_clc_msg_proposal_area {
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
- struct smc_clc_v2_extension pclc_v2_ext;
+ struct smc_clc_v2_extension_fixed pclc_v2_ext;
u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN];
- struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
+ struct smc_clc_smcd_v2_extension_fixed pclc_smcd_v2_ext;
struct smc_clc_smcd_gid_chid
pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES];
struct smc_clc_msg_trail pclc_trl;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index e4c8584112..acca3b1a06 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1149,6 +1149,20 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
}
}
+static void smcd_buf_detach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+
+ if (!conn->sndbuf_desc)
+ return;
+
+ smc_ism_detach_dmb(smcd, peer_token);
+
+ kfree(conn->sndbuf_desc);
+ conn->sndbuf_desc = NULL;
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
@@ -1192,6 +1206,8 @@ void smc_conn_free(struct smc_connection *conn)
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(lgr->smcd))
+ smcd_buf_detach(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_wait_pend_tx_wr(conn);
@@ -1445,6 +1461,8 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
smc_sk_wake_ups(smc);
if (conn->lgr->is_smcd) {
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ smcd_buf_detach(conn);
if (soft)
tasklet_kill(&conn->rx_tsklet);
else
@@ -1535,7 +1553,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
if ((!peer_gid->gid ||
(lgr->peer_gid.gid == peer_gid->gid &&
- !smc_ism_is_virtual(dev) ? 1 :
+ !smc_ism_is_emulated(dev) ? 1 :
lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
if (peer_gid->gid) /* peer triggered termination */
@@ -1881,7 +1899,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
lgr->smcd != smcismdev)
return false;
- if (smc_ism_is_virtual(smcismdev) &&
+ if (smc_ism_is_emulated(smcismdev) &&
lgr->peer_gid.gid_ext != peer_gid->gid_ext)
return false;
@@ -1997,7 +2015,6 @@ out:
*/
static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
{
- const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
@@ -2007,9 +2024,11 @@ static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
compressed = min_t(u8, ilog2(size) + 1,
is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
if (!is_smcd && is_rmb)
/* RMBs are backed by & limited to max size of scatterlists */
- compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+ compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14));
+#endif
return compressed;
}
@@ -2464,12 +2483,18 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
int rc;
/* create send buffer */
+ if (is_smcd &&
+ smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
+ goto create_rmb;
+
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
return rc;
+
+create_rmb:
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
- if (rc) {
+ if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
up_write(&smc->conn.lgr->sndbufs_lock);
@@ -2479,6 +2504,41 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
}
+int smcd_buf_attach(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+ struct smc_buf_desc *buf_desc;
+ int rc;
+
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return -ENOMEM;
+
+ /* The ghost sndbuf_desc describes the same memory region as
+ * peer RMB. Its lifecycle is consistent with the connection's
+ * and it will be freed with the connections instead of the
+ * link group.
+ */
+ rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+ if (rc)
+ goto free;
+
+ smc->sk.sk_sndbuf = buf_desc->len;
+ buf_desc->cpu_addr =
+ (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+ buf_desc->len -= sizeof(struct smcd_cdc_msg);
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_desc->used = 1;
+ atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+ return 0;
+
+free:
+ kfree(buf_desc);
+ return rc;
+}
+
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1f17537603..d93cf51dbd 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -557,6 +557,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_sock *smc);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 5a33908015..6fdb2d9677 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -255,6 +255,7 @@ static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler smc_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_SMC,
.dump = smc_diag_handler_dump,
};
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 97704a9e84..9297dc20bf 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
- goto out;
- neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
- if (neigh) {
- memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
- *uses_gateway = rt->rt_uses_gateway;
- return 0;
- }
+ goto out_rt;
+ neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr);
+ if (!neigh)
+ goto out_rt;
+ memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+ *uses_gateway = rt->rt_uses_gateway;
+ neigh_release(neigh);
+ ip_rt_put(rt);
+ return 0;
+
+out_rt:
+ ip_rt_put(rt);
out:
return -ENOENT;
}
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index ac88de2a06..84f98e18c7 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -91,6 +91,11 @@ bool smc_ism_is_v2_capable(void)
return smc_ism_v2_capable;
}
+void smc_ism_set_v2_capable(void)
+{
+ smc_ism_v2_capable = true;
+}
+
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
@@ -126,6 +131,8 @@ int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->add_vlan_id)
+ return -EOPNOTSUPP;
/* create new vlan entry, in case we need it */
new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
@@ -171,6 +178,8 @@ int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->del_vlan_id)
+ return -EOPNOTSUPP;
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
@@ -222,7 +231,6 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
-#if IS_ENABLED(CONFIG_ISM)
struct smcd_dmb dmb;
int rc;
@@ -231,7 +239,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid.gid;
- rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
+ rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, lgr->smcd->client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
@@ -240,9 +248,46 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb_desc->len = dmb.dmb_len;
}
return rc;
-#else
- return 0;
-#endif
+}
+
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ /* for now only loopback-ism supports
+ * merging sndbuf with peer DMB to avoid
+ * data copies between them.
+ */
+ return (smcd->ops->support_dmb_nocopy &&
+ smcd->ops->support_dmb_nocopy(smcd));
+}
+
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc)
+{
+ struct smcd_dmb dmb;
+ int rc = 0;
+
+ if (!dev->ops->attach_dmb)
+ return -EINVAL;
+
+ memset(&dmb, 0, sizeof(dmb));
+ dmb.dmb_tok = token;
+ rc = dev->ops->attach_dmb(dev, &dmb);
+ if (!rc) {
+ dmb_desc->sba_idx = dmb.sba_idx;
+ dmb_desc->token = dmb.dmb_tok;
+ dmb_desc->cpu_addr = dmb.cpu_addr;
+ dmb_desc->dma_addr = dmb.dma_addr;
+ dmb_desc->len = dmb.dmb_len;
+ }
+ return rc;
+}
+
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token)
+{
+ if (!dev->ops->detach_dmb)
+ return -EINVAL;
+
+ return dev->ops->detach_dmb(dev, token);
}
static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
@@ -322,6 +367,8 @@ static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list,
list_for_each_entry(smcd, &dev_list->list, list) {
if (num < snum)
goto next;
+ if (smc_ism_is_loopback(smcd))
+ goto next;
if (smc_nl_handle_smcd_dev(smcd, skb, cb))
goto errout;
next:
@@ -372,7 +419,8 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id);
break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- if (ev_info.code == ISM_EVENT_REQUEST) {
+ if (ev_info.code == ISM_EVENT_REQUEST &&
+ wrk->smcd->ops->signal_event) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
&peer_gid,
@@ -436,7 +484,7 @@ static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
static void smcd_register_dev(struct ism_dev *ism)
{
const struct smcd_ops *ops = ism_get_smcd_ops();
- struct smcd_dev *smcd;
+ struct smcd_dev *smcd, *fentry;
if (!ops)
return;
@@ -446,20 +494,28 @@ static void smcd_register_dev(struct ism_dev *ism)
if (!smcd)
return;
smcd->priv = ism;
+ smcd->client = &smc_ism_client;
ism_set_priv(ism, &smc_ism_client, smcd);
if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid))
smc_pnetid_by_table_smcd(smcd);
+ if (smcd->ops->supports_v2())
+ smc_ism_set_v2_capable();
mutex_lock(&smcd_dev_list.mutex);
- if (list_empty(&smcd_dev_list.list)) {
- if (smcd->ops->supports_v2())
- smc_ism_v2_capable = true;
- }
- /* sort list: devices without pnetid before devices with pnetid */
- if (smcd->pnetid[0])
+ /* sort list:
+ * - devices without pnetid before devices with pnetid;
+ * - loopback-ism always at the very beginning;
+ */
+ if (!smcd->pnetid[0]) {
+ fentry = list_first_entry_or_null(&smcd_dev_list.list,
+ struct smcd_dev, list);
+ if (fentry && smc_ism_is_loopback(fentry))
+ list_add(&smcd->list, &fentry->list);
+ else
+ list_add(&smcd->list, &smcd_dev_list.list);
+ } else {
list_add_tail(&smcd->list, &smcd_dev_list.list);
- else
- list_add(&smcd->list, &smcd_dev_list.list);
+ }
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
@@ -541,6 +597,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
if (lgr->peer_shutdown)
return 0;
+ if (!lgr->smcd->ops->signal_event)
+ return 0;
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index ffff40c30a..6763133dd8 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -15,7 +15,7 @@
#include "smc.h"
-#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00
+#define SMC_EMULATED_ISM_CHID_MASK 0xFF00
#define SMC_ISM_IDENT_MASK 0x00FFFF
struct smcd_dev_list { /* List of SMCD devices */
@@ -48,10 +48,15 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd);
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc);
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
+void smc_ism_set_v2_capable(void);
int smc_ism_init(void);
void smc_ism_exit(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
@@ -66,10 +71,10 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
return rc < 0 ? rc : 0;
}
-static inline bool __smc_ism_is_virtual(u16 chid)
+static inline bool __smc_ism_is_emulated(u16 chid)
{
/* CHIDs in range of 0xFF00 to 0xFFFF are reserved
- * for virtual ISM device.
+ * for Emulated-ISM device.
*
* loopback-ism: 0xFFFF
* virtio-ism: 0xFF00 ~ 0xFFFE
@@ -77,11 +82,16 @@ static inline bool __smc_ism_is_virtual(u16 chid)
return ((chid & 0xFF00) == 0xFF00);
}
-static inline bool smc_ism_is_virtual(struct smcd_dev *smcd)
+static inline bool smc_ism_is_emulated(struct smcd_dev *smcd)
{
u16 chid = smcd->ops->get_chid(smcd);
- return __smc_ism_is_virtual(chid);
+ return __smc_ism_is_emulated(chid);
+}
+
+static inline bool smc_ism_is_loopback(struct smcd_dev *smcd)
+{
+ return (smcd->ops->get_chid(smcd) == 0xFFFF);
}
#endif
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
new file mode 100644
index 0000000000..3c5f64ca41
--- /dev/null
+++ b/net/smc/smc_loopback.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * Functions for loopback-ism device.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <net/smc.h>
+
+#include "smc_cdc.h"
+#include "smc_ism.h"
+#include "smc_loopback.h"
+
+#define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */
+#define SMC_LO_SUPPORT_NOCOPY 0x1
+#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0)
+
+static const char smc_lo_dev_name[] = "loopback-ism";
+static struct smc_lo_dev *lo_dev;
+
+static void smc_lo_generate_ids(struct smc_lo_dev *ldev)
+{
+ struct smcd_gid *lgid = &ldev->local_gid;
+ uuid_t uuid;
+
+ uuid_gen(&uuid);
+ memcpy(&lgid->gid, &uuid, sizeof(lgid->gid));
+ memcpy(&lgid->gid_ext, (u8 *)&uuid + sizeof(lgid->gid),
+ sizeof(lgid->gid_ext));
+
+ ldev->chid = SMC_LO_RESERVED_CHID;
+}
+
+static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid,
+ u32 vid_valid, u32 vid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* rgid should be the same as lgid */
+ if (!ldev || rgid->gid != ldev->local_gid.gid ||
+ rgid->gid_ext != ldev->local_gid.gid_ext)
+ return -ENETUNREACH;
+ return 0;
+}
+
+static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
+ void *client_priv)
+{
+ struct smc_lo_dmb_node *dmb_node, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ int sba_idx, rc;
+
+ /* check space for new dmb */
+ for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) {
+ if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
+ break;
+ }
+ if (sba_idx == SMC_LO_MAX_DMBS)
+ return -ENOSPC;
+
+ dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
+ if (!dmb_node) {
+ rc = -ENOMEM;
+ goto err_bit;
+ }
+
+ dmb_node->sba_idx = sba_idx;
+ dmb_node->len = dmb->dmb_len;
+ dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL |
+ __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC);
+ if (!dmb_node->cpu_addr) {
+ rc = -ENOMEM;
+ goto err_node;
+ }
+ dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
+ refcount_set(&dmb_node->refcnt, 1);
+
+again:
+ /* add new dmb into hash table */
+ get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) {
+ if (tmp_node->token == dmb_node->token) {
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ goto again;
+ }
+ }
+ hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ atomic_inc(&ldev->dmb_cnt);
+
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+
+ return 0;
+
+err_node:
+ kfree(dmb_node);
+err_bit:
+ clear_bit(sba_idx, ldev->sba_idx_mask);
+ return rc;
+}
+
+static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev,
+ struct smc_lo_dmb_node *dmb_node)
+{
+ /* remove dmb from hash table */
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_del(&dmb_node->list);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+
+ clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+ kvfree(dmb_node->cpu_addr);
+ kfree(dmb_node);
+
+ if (atomic_dec_and_test(&ldev->dmb_cnt))
+ wake_up(&ldev->ldev_release);
+}
+
+static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb from hash table */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ return SMC_LO_SUPPORT_NOCOPY;
+}
+
+static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (!refcount_inc_not_zero(&dmb_node->refcnt))
+ /* the dmb is being unregistered, but has
+ * not been removed from the hash table.
+ */
+ return -EINVAL;
+
+ /* provide dmb information */
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+ return 0;
+}
+
+static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) {
+ if (tmp_node->token == token) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
+ unsigned int idx, bool sf, unsigned int offset,
+ void *data, unsigned int size)
+{
+ struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ struct smc_connection *conn;
+
+ if (!sf)
+ /* since sndbuf is merged with peer DMB, there is
+ * no need to copy data from sndbuf to peer DMB.
+ */
+ return 0;
+
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
+ if (tmp_node->token == dmb_tok) {
+ rmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!rmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ memcpy((char *)rmb_node->cpu_addr + offset, data, size);
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ conn = smcd->conn[rmb_node->sba_idx];
+ if (!conn || conn->killed)
+ return -EPIPE;
+ tasklet_schedule(&conn->rx_tsklet);
+ return 0;
+}
+
+static int smc_lo_supports_v2(void)
+{
+ return SMC_LO_V2_CAPABLE;
+}
+
+static void smc_lo_get_local_gid(struct smcd_dev *smcd,
+ struct smcd_gid *smcd_gid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ smcd_gid->gid = ldev->local_gid.gid;
+ smcd_gid->gid_ext = ldev->local_gid.gid_ext;
+}
+
+static u16 smc_lo_get_chid(struct smcd_dev *smcd)
+{
+ return ((struct smc_lo_dev *)smcd->priv)->chid;
+}
+
+static struct device *smc_lo_get_dev(struct smcd_dev *smcd)
+{
+ return &((struct smc_lo_dev *)smcd->priv)->dev;
+}
+
+static const struct smcd_ops lo_ops = {
+ .query_remote_gid = smc_lo_query_rgid,
+ .register_dmb = smc_lo_register_dmb,
+ .unregister_dmb = smc_lo_unregister_dmb,
+ .support_dmb_nocopy = smc_lo_support_dmb_nocopy,
+ .attach_dmb = smc_lo_attach_dmb,
+ .detach_dmb = smc_lo_detach_dmb,
+ .add_vlan_id = NULL,
+ .del_vlan_id = NULL,
+ .set_vlan_required = NULL,
+ .reset_vlan_required = NULL,
+ .signal_event = NULL,
+ .move_data = smc_lo_move_data,
+ .supports_v2 = smc_lo_supports_v2,
+ .get_local_gid = smc_lo_get_local_gid,
+ .get_chid = smc_lo_get_chid,
+ .get_dev = smc_lo_get_dev,
+};
+
+static struct smcd_dev *smcd_lo_alloc_dev(const struct smcd_ops *ops,
+ int max_dmbs)
+{
+ struct smcd_dev *smcd;
+
+ smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+ if (!smcd)
+ return NULL;
+
+ smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+ GFP_KERNEL);
+ if (!smcd->conn)
+ goto out_smcd;
+
+ smcd->ops = ops;
+
+ spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
+ INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
+ init_waitqueue_head(&smcd->lgrs_deleted);
+ return smcd;
+
+out_smcd:
+ kfree(smcd);
+ return NULL;
+}
+
+static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd;
+
+ smcd = smcd_lo_alloc_dev(&lo_ops, SMC_LO_MAX_DMBS);
+ if (!smcd)
+ return -ENOMEM;
+ ldev->smcd = smcd;
+ smcd->priv = ldev;
+ smc_ism_set_v2_capable();
+ mutex_lock(&smcd_dev_list.mutex);
+ list_add(&smcd->list, &smcd_dev_list.list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ pr_warn_ratelimited("smc: adding smcd device %s\n",
+ dev_name(&ldev->dev));
+ return 0;
+}
+
+static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd = ldev->smcd;
+
+ pr_warn_ratelimited("smc: removing smcd device %s\n",
+ dev_name(&ldev->dev));
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
+ mutex_lock(&smcd_dev_list.mutex);
+ list_del_init(&smcd->list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ kfree(smcd->conn);
+ kfree(smcd);
+}
+
+static int smc_lo_dev_init(struct smc_lo_dev *ldev)
+{
+ smc_lo_generate_ids(ldev);
+ rwlock_init(&ldev->dmb_ht_lock);
+ hash_init(ldev->dmb_ht);
+ atomic_set(&ldev->dmb_cnt, 0);
+ init_waitqueue_head(&ldev->ldev_release);
+
+ return smcd_lo_register_dev(ldev);
+}
+
+static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
+{
+ smcd_lo_unregister_dev(ldev);
+ if (atomic_read(&ldev->dmb_cnt))
+ wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
+}
+
+static void smc_lo_dev_release(struct device *dev)
+{
+ struct smc_lo_dev *ldev =
+ container_of(dev, struct smc_lo_dev, dev);
+
+ kfree(ldev);
+}
+
+static int smc_lo_dev_probe(void)
+{
+ struct smc_lo_dev *ldev;
+ int ret;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return -ENOMEM;
+
+ ldev->dev.parent = NULL;
+ ldev->dev.release = smc_lo_dev_release;
+ device_initialize(&ldev->dev);
+ dev_set_name(&ldev->dev, smc_lo_dev_name);
+
+ ret = smc_lo_dev_init(ldev);
+ if (ret)
+ goto free_dev;
+
+ lo_dev = ldev; /* global loopback device */
+ return 0;
+
+free_dev:
+ put_device(&ldev->dev);
+ return ret;
+}
+
+static void smc_lo_dev_remove(void)
+{
+ if (!lo_dev)
+ return;
+
+ smc_lo_dev_exit(lo_dev);
+ put_device(&lo_dev->dev); /* device_initialize in smc_lo_dev_probe */
+}
+
+int smc_loopback_init(void)
+{
+ return smc_lo_dev_probe();
+}
+
+void smc_loopback_exit(void)
+{
+ smc_lo_dev_remove();
+}
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
new file mode 100644
index 0000000000..6dd4292dae
--- /dev/null
+++ b/net/smc/smc_loopback.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * SMC-D loopback-ism device structure definitions.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_LOOPBACK_H
+#define _SMC_LOOPBACK_H
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <net/smc.h>
+
+#if IS_ENABLED(CONFIG_SMC_LO)
+#define SMC_LO_MAX_DMBS 5000
+#define SMC_LO_DMBS_HASH_BITS 12
+#define SMC_LO_RESERVED_CHID 0xFFFF
+
+struct smc_lo_dmb_node {
+ struct hlist_node list;
+ u64 token;
+ u32 len;
+ u32 sba_idx;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+ refcount_t refcnt;
+};
+
+struct smc_lo_dev {
+ struct smcd_dev *smcd;
+ struct device dev;
+ u16 chid;
+ struct smcd_gid local_gid;
+ atomic_t dmb_cnt;
+ rwlock_t dmb_ht_lock;
+ DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
+ DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
+ wait_queue_head_t ldev_release;
+};
+
+int smc_loopback_init(void);
+void smc_loopback_exit(void);
+#else
+static inline int smc_loopback_init(void)
+{
+ return 0;
+}
+
+static inline void smc_loopback_exit(void)
+{
+}
+#endif
+
+#endif /* _SMC_LOOPBACK_H */
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 9a2f3638d1..f0cbe77a80 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -42,10 +42,10 @@ static void smc_rx_wake_up(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_HUP);
rcu_read_unlock();
}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index 9d32058db2..e19177ce40 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -19,7 +19,7 @@
#include "smc_clc.h"
-#define SMC_MAX_FBACK_RSN_CNT 30
+#define SMC_MAX_FBACK_RSN_CNT 36
enum {
SMC_BUF_8K,
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index a5946d1b9d..13f2bc092d 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,11 +90,11 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
- { }
};
int __net_init smc_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(smc_table);
struct ctl_table *table;
table = smc_table;
@@ -105,12 +105,12 @@ int __net_init smc_sysctl_net_init(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (void *)net - (void *)&init_net;
}
net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table,
- ARRAY_SIZE(smc_table));
+ table_size);
if (!net->smc.smc_hdr)
goto err_reg;
@@ -133,7 +133,7 @@ err_alloc:
void __net_exit smc_sysctl_net_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->smc.smc_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->smc.smc_hdr);
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index 9fc5e586d2..a9a6e3c111 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
__entry->smc = smc;
__entry->net_cookie = sock_net(sk)->net_cookie;
__entry->len = len;
- __assign_str(name, smc->conn.lnk->ibname);
+ __assign_str(name);
),
TP_printk("smc=%p net=%llu len=%zu dev=%s",
@@ -104,7 +104,7 @@ TRACE_EVENT(smcr_link_down,
__entry->lgr = lgr;
__entry->net_cookie = lgr->net->net_cookie;
__entry->state = lnk->state;
- __assign_str(name, lnk->ibname);
+ __assign_str(name);
__entry->location = location;
),
diff --git a/net/socket.c b/net/socket.c
index ed3df2f749..e416920e93 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,7 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
-#include <linux/io_uring.h>
+#include <linux/io_uring/net.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -343,7 +343,7 @@ static void init_inodecache(void)
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
BUG_ON(sock_inode_cachep == NULL);
}
@@ -1890,7 +1890,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-struct file *do_accept(struct file *file, unsigned file_flags,
+struct file *do_accept(struct file *file, struct proto_accept_arg *arg,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
@@ -1926,8 +1926,8 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
- false);
+ arg->flags |= sock->file->f_flags;
+ err = ops->accept(sock, newsock, arg);
if (err < 0)
goto out_fd;
@@ -1953,6 +1953,7 @@ out_fd:
static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
+ struct proto_accept_arg arg = { };
struct file *newfile;
int newfd;
@@ -1966,7 +1967,7 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s
if (unlikely(newfd < 0))
return newfd;
- newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
+ newfile = do_accept(file, &arg, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
@@ -2600,9 +2601,9 @@ out:
return err;
}
-int sendmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct iovec **iov)
+static int sendmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct iovec **iov)
{
int err;
@@ -2753,10 +2754,10 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
}
-int recvmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct sockaddr __user **uaddr,
- struct iovec **iov)
+static int recvmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct sockaddr __user **uaddr,
+ struct iovec **iov)
{
ssize_t err;
@@ -3580,6 +3581,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
const struct proto_ops *ops = READ_ONCE(sock->ops);
+ struct proto_accept_arg arg = {
+ .flags = flags,
+ .kern = true,
+ };
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3587,7 +3592,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, &arg);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c7af0220f8..369310909f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
- if (unlikely(snd_buf->len > snd_buf->buflen))
+ if (unlikely(snd_buf->len > snd_buf->buflen)) {
+ status = -EIO;
goto wrap_failed;
+ }
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index c53b329092..4ebc1b7043 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len)
}
static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_KERNEL);
+ dest->data = kmemdup_noprof(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
@@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
dest->len = len;
return q;
}
+
+#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__))
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 06d8ee0db0..4eb19c3a54 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -168,7 +168,7 @@ static int krb5_DK(const struct gss_krb5_enctype *gk5e,
goto err_return;
blocksize = crypto_sync_skcipher_blocksize(cipher);
if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
- goto err_return;
+ goto err_free_cipher;
ret = -ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 24de941847..73a90ad873 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1033,17 +1033,11 @@ null_verifier:
static void gss_free_in_token_pages(struct gssp_in_token *in_token)
{
- u32 inlen;
int i;
i = 0;
- inlen = in_token->page_len;
- while (inlen) {
- if (in_token->pages[i])
- put_page(in_token->pages[i]);
- inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen;
- }
-
+ while (in_token->pages[i])
+ put_page(in_token->pages[i++]);
kfree(in_token->pages);
in_token->pages = NULL;
}
@@ -1075,7 +1069,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
goto out_denied_free;
pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
- in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
+ in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
if (!in_token->pages)
goto out_denied_free;
in_token->page_base = 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cda0935a68..09f29a95f2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -405,7 +405,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
- clnt->cl_stats = program->stats;
+ clnt->cl_stats = args->stats ? : program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
@@ -691,6 +691,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -713,6 +714,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.version = clnt->cl_vers,
.authflavor = flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -1068,6 +1070,8 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
+ .stats = old->cl_stats,
+ .timeout = old->cl_timeout,
};
struct rpc_clnt *clnt;
int err;
@@ -2322,12 +2326,13 @@ call_transmit_status(struct rpc_task *task)
task->tk_action = call_transmit;
task->tk_status = 0;
break;
- case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
+ break;
+ case -ECONNREFUSED:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
@@ -2695,8 +2700,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
goto out_msg_denied;
error = rpcauth_checkverf(task, xdr);
- if (error)
+ if (error) {
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ rpcauth_invalcred(task);
+ if (!task->tk_cred_retry)
+ goto out_err;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ return -EKEYREJECTED;
+ }
goto out_verifier;
+ }
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index dcc2b4f49e..910a5d850d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1490,7 +1490,7 @@ int register_rpc_pipefs(void)
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6debf4fd42..cef623ea15 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
- } else
+ } else {
+ smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
+ }
}
/*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 65fc1297c6..383860cb1d 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
- return do_register(net, statp->program->pg_name, statp, proc_ops);
+ return do_register(net, statp->program->pg_name, net, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b969e505c7..d9cda1e53a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -451,8 +451,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- int (*threadfn)(void *data))
+__svc_create(struct svc_program *prog, struct svc_stat *stats,
+ unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -463,7 +463,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_stats = prog->pg_stats;
+ serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
@@ -529,26 +529,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, 1, threadfn);
+ return __svc_create(prog, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
* @prog: the RPC program the new service will handle
+ * @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
*
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, threadfn);
+ serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
@@ -1263,8 +1265,6 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_proc >= versp->vs_nproc)
goto err_bad_proc;
rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
- if (!procp)
- goto err_bad_proc;
/* Initialize storage for argp and resp */
memset(rqstp->rq_argp, 0, procp->pc_argzero);
@@ -1375,7 +1375,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto err_bad_proc;
/* Syntactic check complete */
- serv->sv_stats->rpccnt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
aoffset = xdr_stream_pos(xdr);
@@ -1427,7 +1428,8 @@ err_short_len:
goto close_xprt;
err_bad_rpc:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
xdr_stream_encode_u32(xdr, RPC_MISMATCH);
/* Only RPCv2 supported */
@@ -1438,7 +1440,8 @@ err_bad_rpc:
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
- serv->sv_stats->rpcbadauth++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of reply status: */
xdr_truncate_encode(xdr, XDR_UNIT * 2);
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
@@ -1448,7 +1451,8 @@ err_bad_auth:
err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
@@ -1456,7 +1460,8 @@ err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_mismatch;
/*
@@ -1470,19 +1475,22 @@ err_bad_vers:
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n");
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit;
err_system_err:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_system_err;
goto sendit;
}
@@ -1534,7 +1542,8 @@ void svc_process(struct svc_rqst *rqstp)
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
be32_to_cpu(*p));
- rqstp->rq_server->sv_stats->rpcbadfmt++;
+ if (rqstp->rq_server->sv_stats)
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
}
@@ -1548,9 +1557,11 @@ out_drop:
*/
void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
+ struct rpc_timeout timeout = {
+ .to_increment = 0,
+ };
struct rpc_task *task;
int proc_error;
- struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
@@ -1603,6 +1614,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
timeout.to_initval = req->rq_xprt->timeout->to_initval;
timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ timeout.to_maxval = timeout.to_initval;
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
@@ -1612,7 +1624,6 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
rpc_put_task(task);
}
-EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4a85a227b..dd86d7f1e9 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -46,7 +46,6 @@ static LIST_HEAD(svc_xprt_class_list);
/* SMP locking strategy:
*
- * svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* The "service mutex" protects svc_serv->sv_nrthread.
@@ -211,51 +210,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
-static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
- struct svc_serv *serv,
- struct net *net,
- const int family,
- const unsigned short port,
- int flags)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_ANY),
- .sin_port = htons(port),
- };
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = htons(port),
- };
-#endif
- struct svc_xprt *xprt;
- struct sockaddr *sap;
- size_t len;
-
- switch (family) {
- case PF_INET:
- sap = (struct sockaddr *)&sin;
- len = sizeof(sin);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case PF_INET6:
- sap = (struct sockaddr *)&sin6;
- len = sizeof(sin6);
- break;
-#endif
- default:
- return ERR_PTR(-EAFNOSUPPORT);
- }
-
- xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
- if (IS_ERR(xprt))
- trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, len, xprt);
- return xprt;
-}
-
/**
* svc_xprt_received - start next receiver thread
* @xprt: controlling transport
@@ -294,9 +248,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
}
static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags,
- const struct cred *cred)
+ struct net *net, struct sockaddr *sap,
+ size_t len, int flags, const struct cred *cred)
{
struct svc_xprt_class *xcl;
@@ -312,8 +265,11 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
+ newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(newxprt)) {
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, len,
+ newxprt);
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
}
@@ -330,6 +286,48 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
}
/**
+ * svc_xprt_create_from_sa - Add a new listener to @serv from socket address
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @sap: socket address pointer
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
+ */
+int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, struct sockaddr *sap,
+ int flags, const struct cred *cred)
+{
+ size_t len;
+ int err;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ len = sizeof(struct sockaddr_in);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags,
+ cred);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa);
+
+/**
* svc_xprt_create - Add a new listener to @serv
* @serv: target RPC service
* @xprt_name: transport class name
@@ -339,23 +337,41 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
* @flags: SVC_SOCK flags
* @cred: credential to bind to this transport
*
- * Return values:
- * %0: New listener added successfully
- * %-EPROTONOSUPPORT: Requested transport type not supported
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
*/
int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
- int err;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+#endif
+ struct sockaddr *sap;
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
- if (err == -EPROTONOSUPPORT) {
- request_module("svc%s", xprt_name);
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ switch (family) {
+ case PF_INET:
+ sap = (struct sockaddr *)&sin;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case PF_INET6:
+ sap = (struct sockaddr *)&sin6;
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
}
- return err;
+
+ return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred);
}
EXPORT_SYMBOL_GPL(svc_xprt_create);
@@ -1260,6 +1276,40 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
}
/**
+ * svc_find_listener - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @net: owner net pointer
+ * @sa: sockaddr containing address
+ *
+ * Return the transport instance pointer for the endpoint accepting
+ * connections/peer traffic from the specified transport class,
+ * and matching sockaddr.
+ */
+struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name,
+ struct net *net, const struct sockaddr *sa)
+{
+ struct svc_xprt *xprt;
+ struct svc_xprt *found = NULL;
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ if (xprt->xpt_net != net)
+ continue;
+ if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
+ continue;
+ if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local))
+ continue;
+ found = xprt;
+ svc_xprt_get(xprt);
+ break;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+ return found;
+}
+EXPORT_SYMBOL_GPL(svc_find_listener);
+
+/**
* svc_find_xprt - find an RPC transport instance
* @serv: pointer to svc_serv to search
* @xcl_name: C string containing transport's class name
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 93941ab125..5f3170a1c9 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -160,7 +160,6 @@ static struct ctl_table debug_table[] = {
.mode = 0444,
.proc_handler = proc_do_xprt,
},
- { }
};
void
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index af13fdfa66..09f245cda5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1398,6 +1398,12 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
return;
if (!list_empty(&req->rq_xmit)) {
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (list_is_first(&req->rq_xmit, &xprt->xmit_queue) &&
+ xprt->ops->abort_send_request)
+ xprt->ops->abort_send_request(req);
+
list_del(&req->rq_xmit);
if (!list_empty(&req->rq_xmit2)) {
struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
@@ -1541,6 +1547,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
int is_retrans = RPC_WAS_SENT(task);
int status;
+ if (test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ return -ENOTCONN;
+
if (!req->rq_bytes_sent) {
if (xprt_request_data_received(task)) {
status = 0;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ffbf998949..47f33bb7bf 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
-/* frwr_reset - Place MRs back on the free list
+/**
+ * frwr_reset - Place MRs back on @req's free list
* @req: request to reset
*
* Used after a failed marshal. For FRWR, this means the MRs
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index f86970733e..474f7a98fe 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -209,7 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = {
.extra1 = &zero,
.extra2 = &zero,
},
- { },
};
static void svc_rdma_proc_cleanup(void)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index c9be677864..e5a78b7610 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index c00fcce61d..40797114d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -197,28 +197,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
-/* State for sending a Write or Reply chunk.
- * - Tracks progress of writing one chunk over all its segments
- * - Stores arguments for the SGL constructor functions
- */
-struct svc_rdma_write_info {
- struct svcxprt_rdma *wi_rdma;
-
- const struct svc_rdma_chunk *wi_chunk;
-
- /* write state of this chunk */
- unsigned int wi_seg_off;
- unsigned int wi_seg_no;
-
- /* SGL constructor arguments */
- const struct xdr_buf *wi_xdr;
- unsigned char *wi_base;
- unsigned int wi_next_off;
-
- struct svc_rdma_chunk_ctxt wi_cc;
- struct work_struct wi_work;
-};
-
static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk)
@@ -253,6 +231,49 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
+ * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
+
+ if (!cc->cc_sqecount)
+ return;
+ svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
+}
+
+/**
+ * svc_rdma_reply_done - Reply chunk Write completion handler
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion report
+ *
+ * Pages under I/O are released by a subsequent Send completion.
+ */
+static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_chunk_ctxt *cc =
+ container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svcxprt_rdma *rdma = cq->cq_context;
+
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ trace_svcrdma_wc_reply(&cc->cc_cid);
+ return;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
+ }
+
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+}
+
+/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
* @wc: Work Completion
@@ -580,41 +601,33 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-/**
- * svc_rdma_send_write_chunk - Write all segments in a Write chunk
- * @rdma: controlling RDMA transport
- * @chunk: Write chunk provided by the client
- * @xdr: xdr_buf containing the data payload
- *
- * Returns a non-negative number of bytes the chunk consumed, or
- * %-E2BIG if the payload was larger than the Write chunk,
- * %-EINVAL if client provided too many segments,
- * %-ENOMEM if rdma_rw context pool was exhausted,
- * %-ENOTCONN if posting failed (connection is lost),
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
- */
-int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
+ struct xdr_buf payload;
int ret;
+ if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
+ chunk->ch_payload_length))
+ return -EMSGSIZE;
+
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
- ret = svc_rdma_xb_write(xdr, info);
- if (ret != xdr->len)
+ ret = svc_rdma_xb_write(&payload, info);
+ if (ret != payload.len)
goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
- return xdr->len;
+ return 0;
out_err:
svc_rdma_write_info_free(info);
@@ -622,9 +635,37 @@ out_err:
}
/**
- * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * svc_rdma_send_write_list - Send all chunks on the Write list
* @rdma: controlling RDMA transport
- * @rctxt: Write and Reply chunks from client
+ * @rctxt: Write list provisioned by the client
+ * @xdr: xdr_buf containing an RPC Reply message
+ *
+ * Returns zero on success, or a negative errno if one or more
+ * Write chunks could not be sent.
+ */
+int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr)
+{
+ struct svc_rdma_chunk *chunk;
+ int ret;
+
+ pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
+ if (!chunk->ch_payload_length)
+ break;
+ ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply
*
* Returns a non-negative number of bytes the chunk consumed, or
@@ -634,39 +675,45 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
- struct svc_rdma_chunk *chunk;
+ struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
+ struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+ struct ib_send_wr *first_wr;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
- if (pcl_is_empty(&rctxt->rc_reply_pcl))
- return 0;
-
- chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
- info = svc_rdma_write_info_alloc(rdma, chunk);
- if (!info)
- return -ENOMEM;
- cc = &info->wi_cc;
+ info->wi_rdma = rdma;
+ info->wi_chunk = pcl_first_chunk(reply_pcl);
+ info->wi_seg_off = 0;
+ info->wi_seg_no = 0;
+ info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_write, info);
if (ret < 0)
- goto out_err;
+ return ret;
- trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
- goto out_err;
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
- return xdr->len;
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
-out_err:
- svc_rdma_write_info_free(info);
- return ret;
+ trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return xdr->len;
}
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1a49b7f020..bb5436b719 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -205,9 +205,13 @@ out:
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
+ svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
+ ctxt->sc_wr_chain = &ctxt->sc_send_wr;
+ ctxt->sc_sqecount = 1;
+
return ctxt;
out_empty:
@@ -223,6 +227,8 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_reply_chunk_release(rdma, ctxt);
+
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
@@ -293,7 +299,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- svc_rdma_wake_send_waiters(rdma, 1);
+ svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
@@ -312,51 +318,76 @@ flushed:
}
/**
- * svc_rdma_send - Post a single Send WR
- * @rdma: transport on which to post the WR
- * @ctxt: send ctxt with a Send WR ready to post
+ * svc_rdma_post_send - Post a WR chain to the Send Queue
+ * @rdma: transport context
+ * @ctxt: WR chain to post
+ *
+ * Copy fields in @ctxt to stack variables in order to guarantee
+ * that these values remain available after the ib_post_send() call.
+ * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
+ *
+ * Note there is potential for starvation when the Send Queue is
+ * full because there is no order to when waiting threads are
+ * awoken. The transport is typically provisioned with a deep
+ * enough Send Queue that SQ exhaustion should be a rare event.
*
- * Returns zero if the Send WR was posted successfully. Otherwise, a
- * negative errno is returned.
+ * Return values:
+ * %0: @ctxt's WR chain was posted successfully
+ * %-ENOTCONN: The connection was lost
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
+int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
- struct ib_send_wr *wr = &ctxt->sc_send_wr;
- int ret;
+ struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
+ struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
+ const struct ib_send_wr *bad_wr = first_wr;
+ struct rpc_rdma_cid cid = ctxt->sc_cid;
+ int ret, sqecount = ctxt->sc_sqecount;
might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
- wr->sg_list[0].addr,
- wr->sg_list[0].length,
+ send_wr->sg_list[0].addr,
+ send_wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */
- while (1) {
- if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
+ while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
+ if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+
+ /* When the transport is torn down, assume
+ * ib_drain_sq() will trigger enough Send
+ * completions to wake us. The XPT_CLOSE test
+ * above should then cause the while loop to
+ * exit.
+ */
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
- atomic_inc(&rdma->sc_sq_avail);
+ trace_svcrdma_sq_full(rdma, &cid);
wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > 1);
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
+ atomic_read(&rdma->sc_sq_avail) > 0);
+ trace_svcrdma_sq_retry(rdma, &cid);
continue;
}
trace_svcrdma_post_send(ctxt);
- ret = ib_post_send(rdma->sc_qp, wr, NULL);
- if (ret)
- break;
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret) {
+ trace_svcrdma_sq_post_err(rdma, &cid, ret);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ /* If even one WR was posted, there will be a
+ * Send completion that bumps sc_sq_avail.
+ */
+ if (bad_wr == first_wr) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+ break;
+ }
+ }
return 0;
}
-
- trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
- wake_up(&rdma->sc_send_wait);
- return ret;
+ return -ENOTCONN;
}
/**
@@ -839,16 +870,10 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
- * the server may send all, a portion of, or none of the xdr_buf.
+ * the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
- * RDMA Send is the last step of transmitting an RPC reply. Pages
- * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the sctxt's page array. These pages are
- * DMA unmapped by each Write completion, but the subsequent Send
- * completion finally releases these pages.
- *
* Assumptions:
* - The Reply's transport header will never be larger than a page.
*/
@@ -857,6 +882,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp)
{
+ struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret;
ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
@@ -864,16 +890,19 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
+ /* Transfer pages involved in RDMA Writes to the sctxt's
+ * page array. Completion handling releases these pages.
+ */
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
- sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
- sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
- sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ send_wr->opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/**
@@ -937,7 +966,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
- if (svc_rdma_send(rdma, sctxt))
+ if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
return;
@@ -984,10 +1013,19 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
- goto reply_chunk;
- rc_size = ret;
+ goto put_ctxt;
+
+ rc_size = 0;
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
+ ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, sctxt,
+ &rqstp->rq_res);
+ if (ret < 0)
+ goto reply_chunk;
+ rc_size = ret;
+ }
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
@@ -1030,45 +1068,33 @@ drop_connection:
/**
* svc_rdma_result_payload - special processing for a result payload
- * @rqstp: svc_rqst to operate on
- * @offset: payload's byte offset in @xdr
+ * @rqstp: RPC transaction context
+ * @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes
*
+ * Assign the passed-in result payload to the current Write chunk,
+ * and advance to cur_result_payload to the next Write chunk, if
+ * there is one.
+ *
* Return values:
* %0 if successful or nothing needed to be done
- * %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk
- * %-EINVAL if client provided too many segments
- * %-ENOMEM if rdma_rw context pool was exhausted
- * %-ENOTCONN if posting failed (connection is lost)
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk;
- struct svcxprt_rdma *rdma;
- struct xdr_buf subbuf;
- int ret;
chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk)
return 0;
rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
+
if (length > chunk->ch_length)
return -E2BIG;
-
chunk->ch_position = offset;
chunk->ch_payload_length = length;
-
- if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
- return -EMSGSIZE;
-
- rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
- if (ret < 0)
- return ret;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4f27325ace..2b1c16b954 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -415,15 +415,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
- newxprt->sc_recv_batch;
+ newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) {
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
- ctxts *= newxprt->sc_max_requests;
+
+ /* Arbitrarily estimate the number of rw_ctxs needed for
+ * this transport. This is enough rw_ctxs to make forward
+ * progress even if the client is using one rkey per page
+ * in each Read chunk.
+ */
+ ctxts = 3 * RPCSVC_MAXPAGES;
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@@ -460,12 +465,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
-
+ dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
+ newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
+ newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 29b0562d62..9a8ce5df83 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -137,7 +137,6 @@ static struct ctl_table xr_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
#endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4f8d7efa46..a0b071089e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
pr_info("rpcrdma: removing device %s for %pISpc\n",
ep->re_id->device->name, sap);
- fallthrough;
+ switch (xchg(&ep->re_connect_status, -ENODEV)) {
+ case 0: goto wake_connect_worker;
+ case 1: goto disconnected;
+ }
+ return 0;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
@@ -893,6 +897,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
static void rpcrdma_req_reset(struct rpcrdma_req *req)
{
+ struct rpcrdma_mr *mr;
+
/* Credits are valid for only one connection */
req->rl_slot.rq_cong = 0;
@@ -902,7 +908,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
- frwr_reset(req);
+ /* The verbs consumer can't know the state of an MR on the
+ * req->rl_registered list unless a successful completion
+ * has occurred, so they cannot be re-used.
+ */
+ while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+ struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&mr->mr_all);
+ spin_unlock(&buf->rb_lock);
+
+ frwr_mr_release(mr);
+ }
}
/* ASSUMPTION: the rb_allreqs list is stable for the duration,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 58f3dc8d0d..0e1691316f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -62,6 +62,7 @@
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
+static void xs_reset_srcport(struct sock_xprt *transport);
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -159,7 +160,6 @@ static struct ctl_table xs_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
/*
@@ -883,6 +883,17 @@ static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
}
+static void xs_stream_abort_send_request(struct rpc_rqst *req)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+
+ if (transport->xmit.offset != 0 &&
+ !test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ xprt_force_disconnect(xprt);
+}
+
/*
* Determine if the previous message in the stream was aborted before it
* could complete transmission.
@@ -1565,8 +1576,10 @@ static void xs_tcp_state_change(struct sock *sk)
break;
case TCP_CLOSE:
if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
- &transport->sock_state))
+ &transport->sock_state)) {
+ xs_reset_srcport(transport);
xprt_clear_connecting(xprt);
+ }
clear_bit(XPRT_CLOSING, &xprt->state);
/* Trigger the socket release */
xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
@@ -1722,6 +1735,11 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
xs_update_peer_port(xprt);
}
+static void xs_reset_srcport(struct sock_xprt *transport)
+{
+ transport->srcport = 0;
+}
+
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
{
if (transport->srcport == 0 && transport->xprt.reuseport)
@@ -2423,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
@@ -2645,6 +2670,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
.xprtsec = {
.policy = RPC_XPRTSEC_NONE,
},
+ .stats = upper_clnt->cl_stats,
};
unsigned int pflags = current->flags;
struct rpc_clnt *lower_clnt;
@@ -2987,20 +3013,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len;
}
-/*
- * The close routine. Since this is client initiated, we do nothing
- */
-
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
-/*
- * The xprt destroy routine. Again, because this connection is client
- * initiated, we do nothing
- */
-
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);
@@ -3021,6 +3038,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
+ .abort_send_request = xs_stream_abort_send_request,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_close,
.destroy = xs_destroy,
@@ -3068,6 +3086,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
+ .abort_send_request = xs_stream_abort_send_request,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index c9189a970e..6488ead9e4 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -244,6 +244,99 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
return 0;
}
+static void switchdev_obj_id_to_helpful_msg(struct net_device *dev,
+ enum switchdev_obj_id obj_id,
+ int err, bool add)
+{
+ const char *action = add ? "add" : "del";
+ const char *reason = "";
+ const char *problem;
+ const char *obj_str;
+
+ switch (obj_id) {
+ case SWITCHDEV_OBJ_ID_UNDEFINED:
+ obj_str = "Undefined object";
+ problem = "Attempted operation is undefined, indicating a possible programming\n"
+ "error.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ obj_str = "VLAN entry";
+ problem = "Failure in VLAN settings on this port might disrupt network\n"
+ "segmentation or traffic isolation, affecting network partitioning.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ obj_str = "Port Multicast Database entry";
+ problem = "Failure in updating the port's Multicast Database could lead to\n"
+ "multicast forwarding issues.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ obj_str = "Host Multicast Database entry";
+ problem = "Failure in updating the host's Multicast Database may impact multicast\n"
+ "group memberships or traffic delivery, affecting multicast\n"
+ "communication.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_MRP:
+ obj_str = "Media Redundancy Protocol configuration for port";
+ problem = "Failure to set MRP ring ID on this port prevents communication with\n"
+ "the specified redundancy ring, resulting in an inability to engage\n"
+ "in MRP-based network operations.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_TEST_MRP:
+ obj_str = "MRP Test Frame Operations for port";
+ problem = "Failure to generate/monitor MRP test frames may lead to inability to\n"
+ "assess the ring's operational integrity and fault response, hindering\n"
+ "proactive network management.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
+ obj_str = "MRP Ring Role Configuration";
+ problem = "Improper MRP ring role configuration may create conflicts in the ring,\n"
+ "disrupting communication for all participants, or isolate the local\n"
+ "system from the ring, hindering its ability to communicate with other\n"
+ "participants.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_STATE_MRP:
+ obj_str = "MRP Ring State Configuration";
+ problem = "Failure to correctly set the MRP ring state can result in network\n"
+ "loops or leave segments without communication. In a Closed state,\n"
+ "it maintains loop prevention by blocking one MRM port, while an Open\n"
+ "state activates in response to failures, changing port states to\n"
+ "preserve network connectivity.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_TEST_MRP:
+ obj_str = "MRP_InTest Frame Generation Configuration";
+ problem = "Failure in managing MRP_InTest frame generation can misjudge the\n"
+ "interconnection ring's state, leading to incorrect blocking or\n"
+ "unblocking of the I/C port. This misconfiguration might result\n"
+ "in unintended network loops or isolate critical network segments,\n"
+ "compromising network integrity and reliability.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_ROLE_MRP:
+ obj_str = "Interconnection Ring Role Configuration";
+ problem = "Failure in incorrect assignment of interconnection ring roles\n"
+ "(MIM/MIC) can impair the formation of the interconnection rings.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_STATE_MRP:
+ obj_str = "Interconnection Ring State Configuration";
+ problem = "Failure in updating the interconnection ring state can lead in\n"
+ "case of Open state to incorrect blocking or unblocking of the\n"
+ "I/C port, resulting in unintended network loops or isolation\n"
+ "of critical network\n";
+ break;
+ default:
+ obj_str = "Unknown object";
+ problem = "Indicating a possible programming error.\n";
+ }
+
+ switch (err) {
+ case -ENOSPC:
+ reason = "Current HW/SW setup lacks sufficient resources.\n";
+ break;
+ }
+
+ netdev_err(dev, "Failed to %s %s (object id=%d) with error: %pe (%d).\n%s%s\n",
+ action, obj_str, obj_id, ERR_PTR(err), err, problem, reason);
+}
+
static void switchdev_port_obj_add_deferred(struct net_device *dev,
const void *data)
{
@@ -254,8 +347,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, true);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
@@ -304,8 +396,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
err = switchdev_port_obj_del_now(dev, obj);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, false);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 051ed5f6fc..f5017012a0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -40,7 +40,7 @@ static int is_seen(struct ctl_table_set *set)
/* Return standard mode bits for table entry. */
static int net_ctl_permissions(struct ctl_table_header *head,
- struct ctl_table *table)
+ const struct ctl_table *table)
{
struct net *net = container_of(head->set, struct net, sysctls);
@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head,
}
static void net_ctl_set_ownership(struct ctl_table_header *head,
- struct ctl_table *table,
kuid_t *uid, kgid_t *gid)
{
struct net *net = container_of(head->set, struct net, sysctls);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index be1c4003d6..bb0d71eb02 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -32,16 +32,17 @@ config TIPC_MEDIA_UDP
bool "IP/UDP media type support"
depends on TIPC
select NET_UDP_TUNNEL
+ default y
help
Saying Y here will enable support for running TIPC over IP/UDP
- bool
- default y
+
config TIPC_CRYPTO
bool "TIPC encryption support"
depends on TIPC
select CRYPTO
select CRYPTO_AES
select CRYPTO_GCM
+ default y
help
Saying Y here will enable support for TIPC encryption.
All TIPC messages will be encrypted/decrypted by using the currently most
@@ -49,8 +50,6 @@ config TIPC_CRYPTO
entering the TIPC stack.
Key setting from user-space is performed via netlink by a user program
(e.g. the iproute2 'tipc' tool).
- bool
- default y
config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index ee49a9f1dd..18e1636aa0 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -18,5 +18,5 @@ tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
-
-obj-$(CONFIG_TIPC_DIAG) += diag.o
+obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o
+tipc_diag-y += diag.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 878415c435..5a526ebafe 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1079,30 +1079,27 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
b = tipc_bearer_find(net, name);
if (!b) {
- rtnl_unlock();
NL_SET_ERR_MSG(info->extack, "Bearer not found");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
- rtnl_unlock();
NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
- if (err) {
- rtnl_unlock();
- return err;
- }
}
#endif
+out:
rtnl_unlock();
- return 0;
+ return err;
}
int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index 18733451c9..54dde8c4e4 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -95,6 +95,7 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
}
static const struct sock_diag_handler tipc_sock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_TIPC,
.dump = tipc_sock_diag_handler_dump,
};
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5c9fd4791c..76284fc538 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- *buf = NULL;
if (skb_has_frag_list(frag) && __skb_linearize(frag))
goto err;
+ *buf = NULL;
frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
@@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (!head)
goto err;
+ /* Either the input skb ownership is transferred to headskb
+ * or the input skb is freed, clear the reference to avoid
+ * bad access on error path.
+ */
+ *buf = NULL;
if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
kfree_skb_partial(frag, headstolen);
} else {
@@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
*headbuf = NULL;
return 1;
}
- *buf = NULL;
return 0;
err:
kfree_skb(*buf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3105abe97b..500320e5ca 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -86,8 +86,6 @@ struct tipc_bclink_entry {
* @lock: rwlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
- * @inputq: pointer to input queue containing messages for msg event
- * @namedq: pointer to name table input queue with name table messages
* @active_links: bearer ids of active links, used as index into links[] array
* @links: array containing references to all links to node
* @bc_entry: broadcast link entry
@@ -2107,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
} else {
n = tipc_node_find_by_id(net, ehdr->id);
}
+ skb_dst_force(skb);
tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
if (!skb)
return;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index bb1118d02f..2d58ecae4e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -80,7 +80,6 @@ struct sockaddr_pair {
* @phdr: preformatted message header used when sending messages
* @cong_links: list of congested links
* @publications: list of publications for port
- * @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
* @conn_timeout: the time we can wait for an unresponded setup request
* @probe_unacked: probe has not received ack yet
@@ -147,8 +146,6 @@ static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern);
static void tipc_sk_timeout(struct timer_list *t);
static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
@@ -2712,13 +2709,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* tipc_accept - wait for connection request
* @sock: listening socket
* @new_sock: new socket that is to be connected
- * @flags: file-related flags associated with socket
- * @kern: caused by kernel or by userspace?
+ * @arg: arguments for accept
*
* Return: 0 on success, errno otherwise
*/
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern)
+static int tipc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg)
{
struct sock *new_sk, *sk = sock->sk;
struct tipc_sock *new_tsock;
@@ -2734,14 +2730,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
res = -EINVAL;
goto exit;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
res = tipc_wait_for_accept(sock, timeo);
if (res)
goto exit;
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
@@ -3566,11 +3562,8 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
rhashtable_walk_start(iter);
while ((tsk = rhashtable_walk_next(iter)) != NULL) {
if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- if (err == -EAGAIN) {
- err = 0;
+ if (PTR_ERR(tsk) == -EAGAIN)
continue;
- }
break;
}
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 9fb65c988f..30d2e06e3d 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -91,7 +91,6 @@ static struct ctl_table tipc_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {}
};
int tipc_register_sysctl(void)
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 04af83f050..865142ed0a 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(tipc_skb_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_skb_dump(skb, more, __get_str(buf));
),
@@ -172,7 +172,7 @@ DECLARE_EVENT_CLASS(tipc_list_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_list_dump(list, more, __get_str(buf));
),
@@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(tipc_sk_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->portid = tipc_sock_get_portid(sk);
tipc_sk_dump(sk, dqueues, __get_str(buf));
if (skb)
@@ -254,7 +254,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -337,7 +337,7 @@ DECLARE_EVENT_CLASS(tipc_node_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->addr = tipc_node_get_addr(n);
tipc_node_dump(n, more, __get_str(buf));
),
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(tipc_fsm_class,
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->os = os;
__entry->ns = ns;
__entry->evt = evt;
@@ -409,8 +409,8 @@ TRACE_EVENT(tipc_l2_device_event,
),
TP_fast_assign(
- __assign_str(dev_name, dev->name);
- __assign_str(b_name, b->name);
+ __assign_str(dev_name);
+ __assign_str(b_name);
__entry->evt = evt;
__entry->b_up = test_bit(0, &b->up);
__entry->carrier = netif_carrier_ok(dev);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index f892b0903d..439f755399 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -135,8 +135,11 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
else if (ntohs(ua->proto) == ETH_P_IPV6)
snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
- else
+ else {
pr_err("Invalid UDP media address\n");
+ return 1;
+ }
+
return 0;
}
@@ -174,7 +177,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
local_bh_disable();
ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
- struct rtable *rt = (struct rtable *)ndst;
+ struct rtable *rt = dst_rtable(ndst);
if (!rt) {
struct flowi4 fl = {
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 0cdc1f7b6b..ce8d56a191 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -20,6 +20,7 @@ config TLS
config TLS_DEVICE
bool "Transport Layer Security HW offload"
depends on TLS
+ select SKB_DECRYPTED
select SOCK_VALIDATE_XMIT
select SOCK_RX_QUEUE_MAPPING
default n
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bf8ed36b1a..ab6e694f7b 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -37,6 +37,7 @@
#include <net/inet_connection_sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <linux/skbuff_ref.h>
#include "tls.h"
#include "trace.h"
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4e7228f275..f9e3d3d90d 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -33,6 +33,7 @@
#include <crypto/aead.h>
#include <crypto/scatterwalk.h>
#include <net/ip6_checksum.h>
+#include <linux/skbuff_ref.h>
#include "tls.h"
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b4674f03d7..90b7f253d3 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -816,9 +816,17 @@ struct tls_context *tls_ctx_create(struct sock *sk)
return NULL;
mutex_init(&ctx->tx_lock);
- rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = READ_ONCE(sk->sk_prot);
ctx->sk = sk;
+ /* Release semantic of rcu_assign_pointer() ensures that
+ * ctx->sk_proto is visible before changing sk->sk_prot in
+ * update_sk_prot(), and prevents reading uninitialized value in
+ * tls_{getsockopt, setsockopt}. Note that we do not need a
+ * read barrier in tls_{getsockopt,setsockopt} as there is an
+ * address dependency between sk->sk_proto->{getsockopt,setsockopt}
+ * and ctx->sk_proto.
+ */
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
return ctx;
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 5df08d848b..77e33e1e34 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <linux/workqueue.h>
#include <net/strparser.h>
#include <net/tcp.h>
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b783231668..305a412785 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2147,7 +2147,6 @@ recv_end:
if (ret) {
if (err >= 0 || err == -EINPROGRESS)
err = ret;
- decrypted = 0;
goto end;
}
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 28b232f281..8b5d04210d 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -16,11 +16,6 @@ config UNIX
Say Y unless you know what you are doing.
-config UNIX_SCM
- bool
- depends on UNIX
- default y
-
config AF_UNIX_OOB
bool
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 20491825b4..4ddd125c46 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
-
-obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 9df15a7bc2..be5266007b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,8 +118,6 @@
#include <linux/btf_ids.h>
#include <linux/bpf-cgroup.h>
-#include "scm.h"
-
static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
@@ -223,15 +221,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
}
-static inline int unix_recvq_full(const struct sock *sk)
-{
- return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
-}
-
static inline int unix_recvq_full_lockless(const struct sock *sk)
{
- return skb_queue_len_lockless(&sk->sk_receive_queue) >
- READ_ONCE(sk->sk_max_ack_backlog);
+ return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
struct sock *unix_peer_get(struct sock *s)
@@ -532,10 +524,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
return 0;
}
-static int unix_writable(const struct sock *sk)
+static int unix_writable(const struct sock *sk, unsigned char state)
{
- return sk->sk_state != TCP_LISTEN &&
- (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+ return state != TCP_LISTEN &&
+ (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
}
static void unix_write_space(struct sock *sk)
@@ -543,12 +535,12 @@ static void unix_write_space(struct sock *sk)
struct socket_wq *wq;
rcu_read_lock();
- if (unix_writable(sk)) {
+ if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
@@ -572,7 +564,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
sk_error_report(other);
}
}
- other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -619,7 +610,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
u->path.dentry = NULL;
u->path.mnt = NULL;
state = sk->sk_state;
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
skpair = unix_peer(sk);
unix_peer(sk) = NULL;
@@ -640,7 +631,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
- if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
@@ -733,7 +724,7 @@ static int unix_listen(struct socket *sock, int backlog)
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out; /* Only stream/seqpacket sockets accept */
err = -EINVAL;
- if (!u->addr)
+ if (!READ_ONCE(u->addr))
goto out; /* No listens on an unbound socket */
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
@@ -741,7 +732,8 @@ static int unix_listen(struct socket *sock, int backlog)
if (backlog > sk->sk_max_ack_backlog)
wake_up_interruptible_all(&u->peer_wait);
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = TCP_LISTEN;
+ WRITE_ONCE(sk->sk_state, TCP_LISTEN);
+
/* set credentials so connect can copy them */
init_peercred(sk);
err = 0;
@@ -757,7 +749,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int, bool);
+static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
@@ -978,14 +970,14 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_hash = unix_unbound_hash(sk);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
- sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
+ sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
- u->inflight = 0;
+ u->listener = NULL;
+ u->vertex = NULL;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
@@ -1133,8 +1125,8 @@ static struct sock *unix_find_other(struct net *net,
static int unix_autobind(struct sock *sk)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
u32 lastnum, ordernum;
@@ -1157,6 +1149,7 @@ static int unix_autobind(struct sock *sk)
addr->name->sun_family = AF_UNIX;
refcount_set(&addr->refcnt, 1);
+ old_hash = sk->sk_hash;
ordernum = get_random_u32();
lastnum = ordernum & 0xFFFFF;
retry:
@@ -1197,8 +1190,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
{
umode_t mode = S_IFSOCK |
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct mnt_idmap *idmap;
struct unix_address *addr;
@@ -1236,6 +1229,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
if (u->addr)
goto out_unlock;
+ old_hash = sk->sk_hash;
new_hash = unix_bsd_hash(d_backing_inode(dentry));
unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
@@ -1263,8 +1257,8 @@ out:
static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
int addr_len)
{
- unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ unsigned int new_hash, old_hash;
struct net *net = sock_net(sk);
struct unix_address *addr;
int err;
@@ -1282,6 +1276,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
goto out_mutex;
}
+ old_hash = sk->sk_hash;
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
unix_table_double_lock(net, old_hash, new_hash);
@@ -1371,7 +1366,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
- !unix_sk(sk)->addr) {
+ !READ_ONCE(unix_sk(sk)->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1401,7 +1396,8 @@ restart:
if (err)
goto out_unlock;
- sk->sk_state = other->sk_state = TCP_ESTABLISHED;
+ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
+ WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
} else {
/*
* 1003.1g breaking connected state with AF_UNSPEC
@@ -1418,13 +1414,20 @@ restart:
unix_peer(sk) = other;
if (!other)
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
unix_state_double_unlock(sk, other);
- if (other != old_peer)
+ if (other != old_peer) {
unix_dgram_disconnected(sk, old_peer);
+
+ unix_state_lock(old_peer);
+ if (!unix_peer(old_peer))
+ WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
+ unix_state_unlock(old_peer);
+ }
+
sock_put(old_peer);
} else {
unix_peer(sk) = other;
@@ -1470,9 +1473,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
+ unsigned char state;
long timeo;
int err;
- int st;
err = unix_validate_addr(sunaddr, addr_len);
if (err)
@@ -1483,7 +1486,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1520,7 +1524,6 @@ restart:
goto out;
}
- /* Latch state of peer */
unix_state_lock(other);
/* Apparently VFS overslept socket death. Retry. */
@@ -1536,7 +1539,7 @@ restart:
if (other->sk_shutdown & RCV_SHUTDOWN)
goto out_unlock;
- if (unix_recvq_full(other)) {
+ if (unix_recvq_full_lockless(other)) {
err = -EAGAIN;
if (!timeo)
goto out_unlock;
@@ -1550,39 +1553,21 @@ restart:
goto restart;
}
- /* Latch our state.
-
- It is tricky place. We need to grab our state lock and cannot
- drop lock on peer. It is dangerous because deadlock is
- possible. Connect to self case and simultaneous
- attempt to connect are eliminated by checking socket
- state. other is TCP_LISTEN, if sk is TCP_LISTEN we
- check this before attempt to grab lock.
-
- Well, and we have to recheck the state after socket locked.
+ /* self connect and simultaneous connect are eliminated
+ * by rejecting TCP_LISTEN socket to avoid deadlock.
*/
- st = sk->sk_state;
-
- switch (st) {
- case TCP_CLOSE:
- /* This is ok... continue with connect */
- break;
- case TCP_ESTABLISHED:
- /* Socket is already connected */
- err = -EISCONN;
- goto out_unlock;
- default:
- err = -EINVAL;
+ state = READ_ONCE(sk->sk_state);
+ if (unlikely(state != TCP_CLOSE)) {
+ err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
goto out_unlock;
}
unix_state_lock_nested(sk, U_LOCK_SECOND);
- if (sk->sk_state != st) {
+ if (unlikely(sk->sk_state != TCP_CLOSE)) {
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
unix_state_unlock(sk);
- unix_state_unlock(other);
- sock_put(other);
- goto restart;
+ goto out_unlock;
}
err = security_unix_stream_connect(sk, other, newsk);
@@ -1599,6 +1584,7 @@ restart:
newsk->sk_type = sk->sk_type;
init_peercred(newsk);
newu = unix_sk(newsk);
+ newu->listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
@@ -1630,7 +1616,7 @@ restart:
copy_peercred(sk, other);
sock->state = SS_CONNECTED;
- sk->sk_state = TCP_ESTABLISHED;
+ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
sock_hold(newsk);
smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
@@ -1690,32 +1676,31 @@ static void unix_sock_inherit_flags(const struct socket *old,
set_bit(SOCK_PASSSEC, &new->flags);
}
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int unix_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
- struct sock *tsk;
struct sk_buff *skb;
- int err;
+ struct sock *tsk;
- err = -EOPNOTSUPP;
+ arg->err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out;
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
+ arg->err = -EINVAL;
+ if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
goto out;
/* If socket state is TCP_LISTEN it cannot change (for now...),
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- &err);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb) {
/* This means receive shutdown. */
- if (err == 0)
- err = -EINVAL;
+ if (arg->err == 0)
+ arg->err = -EINVAL;
goto out;
}
@@ -1725,6 +1710,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
/* attach accepted sock to socket */
unix_state_lock(tsk);
+ unix_update_edges(unix_sk(tsk));
newsock->state = SS_CONNECTED;
unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
@@ -1732,7 +1718,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
return 0;
out:
- return err;
+ return arg->err;
}
@@ -1775,51 +1761,60 @@ out:
return err;
}
+/* The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ UNIXCB(skb).fp = scm->fp;
+ scm->fp = NULL;
+
+ if (unix_prepare_fpl(UNIXCB(skb).fp))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ unix_destroy_fpl(scm->fp);
+}
+
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+}
- /*
- * Garbage collection of unix sockets starts by selecting a set of
- * candidate sockets which have reference only from being in flight
- * (total_refs == inflight_refs). This condition is checked once during
- * the candidate collection phase, and candidates are marked as such, so
- * that non-candidates can later be ignored. While inflight_refs is
- * protected by unix_gc_lock, total_refs (file count) is not, hence this
- * is an instantaneous decision.
- *
- * Once a candidate, however, the socket must not be reinstalled into a
- * file descriptor while the garbage collection is in progress.
- *
- * If the above conditions are met, then the directed graph of
- * candidates (*) does not change while unix_gc_lock is held.
- *
- * Any operations that changes the file count through file descriptors
- * (dup, close, sendmsg) does not change the graph since candidates are
- * not installed in fds.
- *
- * Dequeing a candidate via recvmsg would install it into an fd, but
- * that takes unix_gc_lock to decrement the inflight count, so it's
- * serialized with garbage collection.
- *
- * MSG_PEEK is special in that it does not change the inflight count,
- * yet does install the socket into an fd. The following lock/unlock
- * pair is to ensure serialization with garbage collection. It must be
- * done between incrementing the file count and installing the file into
- * an fd.
- *
- * If garbage collection starts after the barrier provided by the
- * lock/unlock, then it will see the elevated refcount and not mark this
- * as a candidate. If a garbage collection is already in progress
- * before the file count was incremented, then the lock/unlock pair will
- * ensure that garbage collection is finished before progressing to
- * installing the fd.
- *
- * (*) A -> B where B is on the queue of A or B is on the queue of C
- * which is on the queue of listening socket A.
- */
- spin_lock(&unix_gc_lock);
- spin_unlock(&unix_gc_lock);
+static void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1878,8 +1873,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_add(fp->count, &u->scm_stat.nr_fds);
+ unix_add_edges(fp, u);
+ }
}
static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
@@ -1887,8 +1884,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_sub(fp->count, &u->scm_stat.nr_fds);
+ unix_del_edges(fp);
+ }
}
/*
@@ -1908,11 +1907,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
long timeo;
int err;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out;
@@ -1937,14 +1937,15 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
}
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+ !READ_ONCE(u->addr)) {
err = unix_autobind(sk);
if (err)
goto out;
}
err = -EMSGSIZE;
- if (len > sk->sk_sndbuf - 32)
+ if (len > READ_ONCE(sk->sk_sndbuf) - 32)
goto out;
if (len > SKB_MAX_ALLOC) {
@@ -2026,7 +2027,7 @@ restart_locked:
unix_peer(sk) = NULL;
unix_dgram_peer_wake_disconnect_wakeup(sk, other);
- sk->sk_state = TCP_CLOSE;
+ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -2157,13 +2158,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
maybe_add_creds(skb, sock, other);
skb_get(skb);
+ scm_stat_add(other, skb);
+
+ spin_lock(&other->sk_receive_queue.lock);
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
-
WRITE_ONCE(ousk->oob_skb, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
- scm_stat_add(other, skb);
- skb_queue_tail(&other->sk_receive_queue, skb);
sk_send_sigurg(other);
unix_state_unlock(other);
other->sk_data_ready(other);
@@ -2184,11 +2187,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
bool fds_sent = false;
int data_len;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -2200,7 +2204,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
if (msg->msg_namelen) {
- err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
+ err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out_err;
} else {
err = -ENOTCONN;
@@ -2209,7 +2213,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_err;
}
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto pipe_err;
while (sent < len) {
@@ -2221,7 +2225,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
&err, 0);
} else {
/* Keep two messages in the pipe so it schedules better */
- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+ size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
@@ -2314,7 +2318,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
if (err)
return err;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
return -ENOTCONN;
if (msg->msg_namelen)
@@ -2328,7 +2332,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
return -ENOTCONN;
return unix_dgram_recvmsg(sock, msg, size, flags);
@@ -2553,8 +2557,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_lock(&u->iolock);
unix_state_lock(sk);
+ spin_lock(&sk->sk_receive_queue.lock);
if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
mutex_unlock(&u->iolock);
return -EINVAL;
@@ -2566,6 +2572,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
WRITE_ONCE(u->oob_skb, NULL);
else
skb_get(oob_skb);
+
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
@@ -2589,29 +2597,53 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
{
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
- skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
- skb = NULL;
+ if (!unix_skb_len(skb)) {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ if (copied && (!u->oob_skb || skb == u->oob_skb)) {
+ skb = NULL;
+ } else if (flags & MSG_PEEK) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ } else {
+ unlinked_skb = skb;
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ }
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(unlinked_skb);
} else {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
if (skb == u->oob_skb) {
if (copied) {
skb = NULL;
- } else if (sock_flag(sk, SOCK_URGINLINE)) {
- if (!(flags & MSG_PEEK)) {
+ } else if (!(flags & MSG_PEEK)) {
+ if (sock_flag(sk, SOCK_URGINLINE)) {
WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
+ } else {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ WRITE_ONCE(u->oob_skb, NULL);
+ unlinked_skb = skb;
+ skb = skb_peek(&sk->sk_receive_queue);
}
- } else if (flags & MSG_PEEK) {
- skb = NULL;
- } else {
- skb_unlink(skb, &sk->sk_receive_queue);
- WRITE_ONCE(u->oob_skb, NULL);
- if (!WARN_ON_ONCE(skb_unref(skb)))
- kfree_skb(skb);
- skb = skb_peek(&sk->sk_receive_queue);
+ } else if (!sock_flag(sk, SOCK_URGINLINE)) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
}
}
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ if (unlinked_skb) {
+ WARN_ON_ONCE(skb_unref(unlinked_skb));
+ kfree_skb(unlinked_skb);
+ }
}
return skb;
}
@@ -2619,10 +2651,49 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int err;
+
+ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
return -ENOTCONN;
- return unix_read_skb(sk, recv_actor);
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (unlikely(skb == READ_ONCE(u->oob_skb))) {
+ bool drop = false;
+
+ unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_DEAD)) {
+ unix_state_unlock(sk);
+ kfree_skb(skb);
+ return -ECONNRESET;
+ }
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (likely(skb == u->oob_skb)) {
+ WRITE_ONCE(u->oob_skb, NULL);
+ drop = true;
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ unix_state_unlock(sk);
+
+ if (drop) {
+ WARN_ON_ONCE(skb_unref(skb));
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
+ }
+#endif
+
+ return recv_actor(sk, skb);
}
static int unix_stream_read_generic(struct unix_stream_read_state *state,
@@ -2643,7 +2714,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
size_t size = state->size;
unsigned int last_len;
- if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
err = -EINVAL;
goto out;
}
@@ -2974,7 +3045,7 @@ long unix_inq_len(struct sock *sk)
struct sk_buff *skb;
long amount = 0;
- if (sk->sk_state == TCP_LISTEN)
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
return -EINVAL;
spin_lock(&sk->sk_receive_queue.lock);
@@ -3059,12 +3130,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
case SIOCATMARK:
{
+ struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int answ = 0;
+ mutex_lock(&u->iolock);
+
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
- answ = 1;
+ if (skb) {
+ struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+
+ if (skb == oob_skb ||
+ (!oob_skb && !unix_skb_len(skb)))
+ answ = 1;
+ }
+
+ mutex_unlock(&u->iolock);
+
err = put_user(answ, (int __user *)arg);
}
break;
@@ -3086,12 +3168,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
+ unsigned char state;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
+ state = READ_ONCE(sk->sk_state);
/* exceptional events? */
if (READ_ONCE(sk->sk_err))
@@ -3113,14 +3197,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
- sk->sk_state == TCP_CLOSE)
+ state == TCP_CLOSE)
mask |= EPOLLHUP;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
- if (unix_writable(sk))
+ if (unix_writable(sk, state))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
@@ -3131,12 +3215,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk, *other;
unsigned int writable;
+ unsigned char state;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
+ state = READ_ONCE(sk->sk_state);
/* exceptional events? */
if (READ_ONCE(sk->sk_err) ||
@@ -3156,19 +3242,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
- if (sk->sk_type == SOCK_SEQPACKET) {
- if (sk->sk_state == TCP_CLOSE)
- mask |= EPOLLHUP;
- /* connection hasn't started yet? */
- if (sk->sk_state == TCP_SYN_SENT)
- return mask;
- }
+ if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
+ mask |= EPOLLHUP;
/* No write status requested, avoid expensive OUT tests. */
if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
- writable = unix_writable(sk);
+ writable = unix_writable(sk, state);
if (writable) {
unix_state_lock(sk);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index be19827eca..937edf4afe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
u32 *buf;
int i;
- if (sk->sk_state == TCP_LISTEN) {
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
spin_lock(&sk->sk_receive_queue.lock);
attr = nla_reserve(nlskb, UNIX_DIAG_ICONS,
@@ -103,8 +103,8 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
{
struct unix_diag_rqlen rql;
- if (sk->sk_state == TCP_LISTEN) {
- rql.udiag_rqueue = sk->sk_receive_queue.qlen;
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
+ rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue);
rql.udiag_wqueue = sk->sk_max_ack_backlog;
} else {
rql.udiag_rqueue = (u32) unix_inq_len(sk);
@@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
rep = nlmsg_data(nlh);
rep->udiag_family = AF_UNIX;
rep->udiag_type = sk->sk_type;
- rep->udiag_state = sk->sk_state;
+ rep->udiag_state = READ_ONCE(sk->sk_state);
rep->pad = 0;
rep->udiag_ino = sk_ino;
sock_diag_save_cookie(sk, rep->udiag_cookie);
@@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
goto out_nlmsg_trim;
- if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
+ if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown)))
goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) &&
@@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
sk_for_each(sk, &net->unx.table.buckets[slot]) {
if (num < s_num)
goto next;
- if (!(req->udiag_states & (1 << sk->sk_state)))
+ if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state))))
goto next;
if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid,
@@ -322,6 +322,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler unix_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_UNIX,
.dump = unix_diag_handler_dump,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a758531e1..23efb78fe9 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -81,278 +81,551 @@
#include <net/scm.h>
#include <net/tcp_states.h>
-#include "scm.h"
+struct unix_sock *unix_get_socket(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops;
+ struct sock *sk = sock->sk;
-/* Internal data structures and random procedures: */
+ ops = READ_ONCE(sock->ops);
-static LIST_HEAD(gc_candidates);
-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+ /* PF_UNIX ? */
+ if (sk && ops && ops->family == PF_UNIX)
+ return unix_sk(sk);
+ }
-static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+ return NULL;
+}
+
+static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
{
- struct sk_buff *skb;
- struct sk_buff *next;
-
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- /* Do we have file descriptors ? */
- if (UNIXCB(skb).fp) {
- bool hit = false;
- /* Process the descriptors of this socket */
- int nfd = UNIXCB(skb).fp->count;
- struct file **fp = UNIXCB(skb).fp->fp;
-
- while (nfd--) {
- /* Get the socket the fd matches if it indeed does so */
- struct sock *sk = unix_get_socket(*fp++);
-
- if (sk) {
- struct unix_sock *u = unix_sk(sk);
-
- /* Ignore non-candidates, they could
- * have been added to the queues after
- * starting the garbage collection
- */
- if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- hit = true;
-
- func(u);
- }
- }
- }
- if (hit && hitlist != NULL) {
- __skb_unlink(skb, &x->sk_receive_queue);
- __skb_queue_tail(hitlist, skb);
- }
- }
+ /* If an embryo socket has a fd,
+ * the listener indirectly holds the fd's refcnt.
+ */
+ if (edge->successor->listener)
+ return unix_sk(edge->successor->listener)->vertex;
+
+ return edge->successor->vertex;
+}
+
+static bool unix_graph_maybe_cyclic;
+static bool unix_graph_grouped;
+
+static void unix_update_graph(struct unix_vertex *vertex)
+{
+ /* If the receiver socket is not inflight, no cyclic
+ * reference could be formed.
+ */
+ if (!vertex)
+ return;
+
+ unix_graph_maybe_cyclic = true;
+ unix_graph_grouped = false;
+}
+
+static LIST_HEAD(unix_unvisited_vertices);
+
+enum unix_vertex_index {
+ UNIX_VERTEX_INDEX_MARK1,
+ UNIX_VERTEX_INDEX_MARK2,
+ UNIX_VERTEX_INDEX_START,
+};
+
+static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1;
+
+static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
+{
+ struct unix_vertex *vertex = edge->predecessor->vertex;
+
+ if (!vertex) {
+ vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
+ vertex->index = unix_vertex_unvisited_index;
+ vertex->out_degree = 0;
+ INIT_LIST_HEAD(&vertex->edges);
+ INIT_LIST_HEAD(&vertex->scc_entry);
+
+ list_move_tail(&vertex->entry, &unix_unvisited_vertices);
+ edge->predecessor->vertex = vertex;
}
- spin_unlock(&x->sk_receive_queue.lock);
+
+ vertex->out_degree++;
+ list_add_tail(&edge->vertex_entry, &vertex->edges);
+
+ unix_update_graph(unix_edge_successor(edge));
}
-static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
{
- if (x->sk_state != TCP_LISTEN) {
- scan_inflight(x, func, hitlist);
- } else {
- struct sk_buff *skb;
- struct sk_buff *next;
- struct unix_sock *u;
- LIST_HEAD(embryos);
+ struct unix_vertex *vertex = edge->predecessor->vertex;
- /* For a listening socket collect the queued embryos
- * and perform a scan on them as well.
- */
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- u = unix_sk(skb->sk);
+ if (!fpl->dead)
+ unix_update_graph(unix_edge_successor(edge));
- /* An embryo cannot be in-flight, so it's safe
- * to use the list link.
- */
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &embryos);
- }
- spin_unlock(&x->sk_receive_queue.lock);
+ list_del(&edge->vertex_entry);
+ vertex->out_degree--;
- while (!list_empty(&embryos)) {
- u = list_entry(embryos.next, struct unix_sock, link);
- scan_inflight(&u->sk, func, hitlist);
- list_del_init(&u->link);
- }
+ if (!vertex->out_degree) {
+ edge->predecessor->vertex = NULL;
+ list_move_tail(&vertex->entry, &fpl->vertices);
}
}
-static void dec_inflight(struct unix_sock *usk)
+static void unix_free_vertices(struct scm_fp_list *fpl)
{
- usk->inflight--;
+ struct unix_vertex *vertex, *next_vertex;
+
+ list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
+ list_del(&vertex->entry);
+ kfree(vertex);
+ }
}
-static void inc_inflight(struct unix_sock *usk)
+static DEFINE_SPINLOCK(unix_gc_lock);
+unsigned int unix_tot_inflight;
+
+void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
{
- usk->inflight++;
+ int i = 0, j = 0;
+
+ spin_lock(&unix_gc_lock);
+
+ if (!fpl->count_unix)
+ goto out;
+
+ do {
+ struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
+ struct unix_edge *edge;
+
+ if (!inflight)
+ continue;
+
+ edge = fpl->edges + i++;
+ edge->predecessor = inflight;
+ edge->successor = receiver;
+
+ unix_add_edge(fpl, edge);
+ } while (i < fpl->count_unix);
+
+ receiver->scm_stat.nr_unix_fds += fpl->count_unix;
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
+
+ spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = true;
+
+ unix_free_vertices(fpl);
}
-static void inc_inflight_move_tail(struct unix_sock *u)
+void unix_del_edges(struct scm_fp_list *fpl)
{
- u->inflight++;
+ struct unix_sock *receiver;
+ int i = 0;
+
+ spin_lock(&unix_gc_lock);
- /* If this still might be part of a cycle, move it to the end
- * of the list, so that it's checked even if it was already
- * passed over
+ if (!fpl->count_unix)
+ goto out;
+
+ do {
+ struct unix_edge *edge = fpl->edges + i++;
+
+ unix_del_edge(fpl, edge);
+ } while (i < fpl->count_unix);
+
+ if (!fpl->dead) {
+ receiver = fpl->edges[0].successor;
+ receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
+ }
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
+
+ spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = false;
+}
+
+void unix_update_edges(struct unix_sock *receiver)
+{
+ /* nr_unix_fds is only updated under unix_state_lock().
+ * If it's 0 here, the embryo socket is not part of the
+ * inflight graph, and GC will not see it, so no lock needed.
*/
- if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
- list_move_tail(&u->link, &gc_candidates);
+ if (!receiver->scm_stat.nr_unix_fds) {
+ receiver->listener = NULL;
+ } else {
+ spin_lock(&unix_gc_lock);
+ unix_update_graph(unix_sk(receiver->listener)->vertex);
+ receiver->listener = NULL;
+ spin_unlock(&unix_gc_lock);
+ }
}
-static bool gc_in_progress;
-#define UNIX_INFLIGHT_TRIGGER_GC 16000
+int unix_prepare_fpl(struct scm_fp_list *fpl)
+{
+ struct unix_vertex *vertex;
+ int i;
+
+ if (!fpl->count_unix)
+ return 0;
+
+ for (i = 0; i < fpl->count_unix; i++) {
+ vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
+ if (!vertex)
+ goto err;
+
+ list_add(&vertex->entry, &fpl->vertices);
+ }
+
+ fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
+ GFP_KERNEL_ACCOUNT);
+ if (!fpl->edges)
+ goto err;
+
+ return 0;
-void wait_for_unix_gc(void)
+err:
+ unix_free_vertices(fpl);
+ return -ENOMEM;
+}
+
+void unix_destroy_fpl(struct scm_fp_list *fpl)
{
- /* If number of inflight sockets is insane,
- * force a garbage collect right now.
- * Paired with the WRITE_ONCE() in unix_inflight(),
- * unix_notinflight() and gc_in_progress().
- */
- if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
- !READ_ONCE(gc_in_progress))
- unix_gc();
- wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress));
+ if (fpl->inflight)
+ unix_del_edges(fpl);
+
+ kvfree(fpl->edges);
+ unix_free_vertices(fpl);
}
-/* The external entry point: unix_gc() */
-void unix_gc(void)
+static bool unix_vertex_dead(struct unix_vertex *vertex)
{
- struct sk_buff *next_skb, *skb;
+ struct unix_edge *edge;
struct unix_sock *u;
- struct unix_sock *next;
- struct sk_buff_head hitlist;
- struct list_head cursor;
- LIST_HEAD(not_cycle_list);
+ long total_ref;
- spin_lock(&unix_gc_lock);
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
- /* Avoid a recursive GC. */
- if (gc_in_progress)
- goto out;
+ /* The vertex's fd can be received by a non-inflight socket. */
+ if (!next_vertex)
+ return false;
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- WRITE_ONCE(gc_in_progress, true);
+ /* The vertex's fd can be received by an inflight socket in
+ * another SCC.
+ */
+ if (next_vertex->scc_index != vertex->scc_index)
+ return false;
+ }
- /* First, select candidates for garbage collection. Only
- * in-flight sockets are considered, and from those only ones
- * which don't have any external reference.
- *
- * Holding unix_gc_lock will protect these candidates from
- * being detached, and hence from gaining an external
- * reference. Since there are no possible receivers, all
- * buffers currently on the candidates' queues stay there
- * during the garbage collection.
- *
- * We also know that no new candidate can be added onto the
- * receive queues. Other, non candidate sockets _can_ be
- * added to queue, so we must make sure only to touch
- * candidates.
- *
- * Embryos, though never candidates themselves, affect which
- * candidates are reachable by the garbage collector. Before
- * being added to a listener's queue, an embryo may already
- * receive data carrying SCM_RIGHTS, potentially making the
- * passed socket a candidate that is not yet reachable by the
- * collector. It becomes reachable once the embryo is
- * enqueued. Therefore, we must ensure that no SCM-laden
- * embryo appears in a (candidate) listener's queue between
- * consecutive scan_children() calls.
- */
- list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
- struct sock *sk = &u->sk;
- long total_refs;
-
- total_refs = file_count(sk->sk_socket->file);
-
- BUG_ON(!u->inflight);
- BUG_ON(total_refs < u->inflight);
- if (total_refs == u->inflight) {
- list_move_tail(&u->link, &gc_candidates);
- __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
-
- if (sk->sk_state == TCP_LISTEN) {
- unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
- unix_state_unlock(sk);
+ /* No receiver exists out of the same SCC. */
+
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ total_ref = file_count(u->sk.sk_socket->file);
+
+ /* If not close()d, total_ref > out_degree. */
+ if (total_ref != vertex->out_degree)
+ return false;
+
+ return true;
+}
+
+enum unix_recv_queue_lock_class {
+ U_RECVQ_LOCK_NORMAL,
+ U_RECVQ_LOCK_EMBRYO,
+};
+
+static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
+{
+ skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ WARN_ON_ONCE(skb_unref(u->oob_skb));
+ u->oob_skb = NULL;
+ }
+#endif
+}
+
+static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
+{
+ struct unix_vertex *vertex;
+
+ list_for_each_entry_reverse(vertex, scc, scc_entry) {
+ struct sk_buff_head *queue;
+ struct unix_edge *edge;
+ struct unix_sock *u;
+
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ queue = &u->sk.sk_receive_queue;
+
+ spin_lock(&queue->lock);
+
+ if (u->sk.sk_state == TCP_LISTEN) {
+ struct sk_buff *skb;
+
+ skb_queue_walk(queue, skb) {
+ struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
+
+ /* listener -> embryo order, the inversion never happens. */
+ spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
+ unix_collect_queue(unix_sk(skb->sk), hitlist);
+ spin_unlock(&embryo_queue->lock);
}
+ } else {
+ unix_collect_queue(u, hitlist);
}
+
+ spin_unlock(&queue->lock);
}
+}
- /* Now remove all internal in-flight reference to children of
- * the candidates.
- */
- list_for_each_entry(u, &gc_candidates, link)
- scan_children(&u->sk, dec_inflight, NULL);
+static bool unix_scc_cyclic(struct list_head *scc)
+{
+ struct unix_vertex *vertex;
+ struct unix_edge *edge;
- /* Restore the references for children of all candidates,
- * which have remaining references. Do this recursively, so
- * only those remain, which form cyclic references.
- *
- * Use a "cursor" link, to make the list traversal safe, even
- * though elements might be moved about.
+ /* SCC containing multiple vertices ? */
+ if (!list_is_singular(scc))
+ return true;
+
+ vertex = list_first_entry(scc, typeof(*vertex), scc_entry);
+
+ /* Self-reference or a embryo-listener circle ? */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ if (unix_edge_successor(edge) == vertex)
+ return true;
+ }
+
+ return false;
+}
+
+static LIST_HEAD(unix_visited_vertices);
+static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
+
+static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
+ struct sk_buff_head *hitlist)
+{
+ LIST_HEAD(vertex_stack);
+ struct unix_edge *edge;
+ LIST_HEAD(edge_stack);
+
+next_vertex:
+ /* Push vertex to vertex_stack and mark it as on-stack
+ * (index >= UNIX_VERTEX_INDEX_START).
+ * The vertex will be popped when finalising SCC later.
*/
- list_add(&cursor, &gc_candidates);
- while (cursor.next != &gc_candidates) {
- u = list_entry(cursor.next, struct unix_sock, link);
+ list_add(&vertex->scc_entry, &vertex_stack);
+
+ vertex->index = *last_index;
+ vertex->scc_index = *last_index;
+ (*last_index)++;
+
+ /* Explore neighbour vertices (receivers of the current vertex's fd). */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
+
+ if (!next_vertex)
+ continue;
+
+ if (next_vertex->index == unix_vertex_unvisited_index) {
+ /* Iterative deepening depth first search
+ *
+ * 1. Push a forward edge to edge_stack and set
+ * the successor to vertex for the next iteration.
+ */
+ list_add(&edge->stack_entry, &edge_stack);
- /* Move cursor to after the current position. */
- list_move(&cursor, &u->link);
+ vertex = next_vertex;
+ goto next_vertex;
- if (u->inflight) {
- list_move_tail(&u->link, &not_cycle_list);
- __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
- scan_children(&u->sk, inc_inflight_move_tail, NULL);
+ /* 2. Pop the edge directed to the current vertex
+ * and restore the ancestor for backtracking.
+ */
+prev_vertex:
+ edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
+ list_del_init(&edge->stack_entry);
+
+ next_vertex = vertex;
+ vertex = edge->predecessor->vertex;
+
+ /* If the successor has a smaller scc_index, two vertices
+ * are in the same SCC, so propagate the smaller scc_index
+ * to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else if (next_vertex->index != unix_vertex_grouped_index) {
+ /* Loop detected by a back/cross edge.
+ *
+ * The successor is on vertex_stack, so two vertices are in
+ * the same SCC. If the successor has a smaller *scc_index*,
+ * propagate it to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else {
+ /* The successor was already grouped as another SCC */
}
}
- list_del(&cursor);
- /* Now gc_candidates contains only garbage. Restore original
- * inflight counters for these as well, and remove the skbuffs
- * which are creating the cycle(s).
- */
- skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link) {
- scan_children(&u->sk, inc_inflight, &hitlist);
+ if (vertex->index == vertex->scc_index) {
+ struct unix_vertex *v;
+ struct list_head scc;
+ bool scc_dead = true;
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
+ /* SCC finalised.
+ *
+ * If the scc_index was not updated, all the vertices above on
+ * vertex_stack are in the same SCC. Group them using scc_entry.
+ */
+ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(v, &scc, scc_entry) {
+ /* Don't restart DFS from this vertex in unix_walk_scc(). */
+ list_move_tail(&v->entry, &unix_visited_vertices);
+
+ /* Mark vertex as off-stack. */
+ v->index = unix_vertex_grouped_index;
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(v);
}
-#endif
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
}
- /* not_cycle_list contains those sockets which do not make up a
- * cycle. Restore these to the inflight list.
+ /* Need backtracking ? */
+ if (!list_empty(&edge_stack))
+ goto prev_vertex;
+}
+
+static void unix_walk_scc(struct sk_buff_head *hitlist)
+{
+ unsigned long last_index = UNIX_VERTEX_INDEX_START;
+
+ unix_graph_maybe_cyclic = false;
+
+ /* Visit every vertex exactly once.
+ * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
*/
- while (!list_empty(&not_cycle_list)) {
- u = list_entry(not_cycle_list.next, struct unix_sock, link);
- __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- list_move_tail(&u->link, &gc_inflight_list);
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ __unix_walk_scc(vertex, &last_index, hitlist);
}
- spin_unlock(&unix_gc_lock);
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+ swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
- /* We need io_uring to clean its registered files, ignore all io_uring
- * originated skbs. It's fine as io_uring doesn't keep references to
- * other io_uring instances and so killing all other files in the cycle
- * will put all io_uring references forcing it to go through normal
- * release.path eventually putting registered files.
- */
- skb_queue_walk_safe(&hitlist, skb, next_skb) {
- if (skb->destructor == io_uring_destruct_scm) {
- __skb_unlink(skb, &hitlist);
- skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ unix_graph_grouped = true;
+}
+
+static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
+{
+ unix_graph_maybe_cyclic = false;
+
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+ struct list_head scc;
+ bool scc_dead = true;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ list_add(&scc, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(vertex, &scc, scc_entry) {
+ list_move_tail(&vertex->entry, &unix_visited_vertices);
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(vertex);
}
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
}
- /* Here we are. Hitlist is filled. Die. */
- __skb_queue_purge(&hitlist);
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+}
+
+static bool gc_in_progress;
+
+static void __unix_gc(struct work_struct *work)
+{
+ struct sk_buff_head hitlist;
+ struct sk_buff *skb;
spin_lock(&unix_gc_lock);
- /* There could be io_uring registered files, just push them back to
- * the inflight list
- */
- list_for_each_entry_safe(u, next, &gc_candidates, link)
- list_move_tail(&u->link, &gc_inflight_list);
+ if (!unix_graph_maybe_cyclic) {
+ spin_unlock(&unix_gc_lock);
+ goto skip_gc;
+ }
+
+ __skb_queue_head_init(&hitlist);
+
+ if (unix_graph_grouped)
+ unix_walk_scc_fast(&hitlist);
+ else
+ unix_walk_scc(&hitlist);
- /* All candidates should have been detached by now. */
- BUG_ON(!list_empty(&gc_candidates));
+ spin_unlock(&unix_gc_lock);
+
+ skb_queue_walk(&hitlist, skb) {
+ if (UNIXCB(skb).fp)
+ UNIXCB(skb).fp->dead = true;
+ }
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ __skb_queue_purge(&hitlist);
+skip_gc:
WRITE_ONCE(gc_in_progress, false);
+}
- wake_up(&unix_gc_wait);
+static DECLARE_WORK(unix_gc_work, __unix_gc);
- out:
- spin_unlock(&unix_gc_lock);
+void unix_gc(void)
+{
+ WRITE_ONCE(gc_in_progress, true);
+ queue_work(system_unbound_wq, &unix_gc_work);
+}
+
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
+#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+
+void wait_for_unix_gc(struct scm_fp_list *fpl)
+{
+ /* If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ *
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight(), and __unix_gc().
+ */
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
+ unix_gc();
+
+ /* Penalise users who want to send AF_UNIX sockets
+ * but whose sockets have not been received yet.
+ */
+ if (!fpl || !fpl->count_unix ||
+ READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
+ return;
+
+ if (READ_ONCE(gc_in_progress))
+ flush_work(&unix_gc_work);
}
diff --git a/net/unix/scm.c b/net/unix/scm.c
deleted file mode 100644
index e92f2fad64..0000000000
--- a/net/unix/scm.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <linux/fs.h>
-#include <net/af_unix.h>
-#include <net/scm.h>
-#include <linux/init.h>
-#include <linux/io_uring.h>
-
-#include "scm.h"
-
-unsigned int unix_tot_inflight;
-EXPORT_SYMBOL(unix_tot_inflight);
-
-LIST_HEAD(gc_inflight_list);
-EXPORT_SYMBOL(gc_inflight_list);
-
-DEFINE_SPINLOCK(unix_gc_lock);
-EXPORT_SYMBOL(unix_gc_lock);
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- const struct proto_ops *ops = READ_ONCE(sock->ops);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && ops && ops->family == PF_UNIX)
- u_sock = s;
- }
-
- return u_sock;
-}
-EXPORT_SYMBOL(unix_get_socket);
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (!u->inflight) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- u->inflight++;
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!u->inflight);
- BUG_ON(list_empty(&u->link));
-
- u->inflight--;
- if (!u->inflight)
- list_del_init(&u->link);
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
- spin_unlock(&unix_gc_lock);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- /*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
-}
-EXPORT_SYMBOL(unix_attach_fds);
-
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-EXPORT_SYMBOL(unix_detach_fds);
-
-void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
-
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-EXPORT_SYMBOL(unix_destruct_scm);
-
-void io_uring_destruct_scm(struct sk_buff *skb)
-{
- unix_destruct_scm(skb);
-}
-EXPORT_SYMBOL(io_uring_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
deleted file mode 100644
index 5a255a477f..0000000000
--- a/net/unix/scm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef NET_UNIX_SCM_H
-#define NET_UNIX_SCM_H
-
-extern struct list_head gc_inflight_list;
-extern spinlock_t unix_gc_lock;
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-
-#endif
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 3e84b31c35..357b3e5f38 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -19,7 +19,6 @@ static struct ctl_table unix_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- { }
};
int __net_init unix_sysctl_register(struct net *net)
@@ -52,7 +51,7 @@ err_alloc:
void unix_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->unx.ctl->ctl_table_arg;
unregister_net_sysctl_table(net->unx.ctl);
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index bd84785bf8..bca2d86ba9 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
struct sk_psock *psock;
int copied;
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
if (!len)
return 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 54ba7316f8..4b040285aa 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1500,8 +1500,8 @@ out:
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int vsock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *listener;
int err;
@@ -1528,7 +1528,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
index 2e29994f92..ab87ef66c1 100644
--- a/net/vmw_vsock/diag.c
+++ b/net/vmw_vsock/diag.c
@@ -157,6 +157,7 @@ static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler vsock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_VSOCK,
.dump = vsock_diag_handler_dump,
};
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ee5d306a96..43d4052988 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -859,7 +859,6 @@ static struct virtio_driver virtio_vsock_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 72074fd36d..1d49cc8b6d 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex))
+$(obj)/shipped-certs.c: $(sort $(wildcard $(src)/certs/*.hex))
@$(kecho) " GEN $@"
$(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index ceb9174c5c..3414b2c3ab 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2023 Intel Corporation
+ * Copyright 2018-2024 Intel Corporation
*/
#include <linux/export.h>
@@ -27,11 +27,10 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
if (WARN_ON(!chan))
return;
- chandef->chan = chan;
- chandef->freq1_offset = chan->freq_offset;
- chandef->center_freq2 = 0;
- chandef->edmg.bw_config = 0;
- chandef->edmg.channels = 0;
+ *chandef = (struct cfg80211_chan_def) {
+ .chan = chan,
+ .freq1_offset = chan->freq_offset,
+ };
switch (chan_type) {
case NL80211_CHAN_NO_HT:
@@ -56,6 +55,73 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
}
EXPORT_SYMBOL(cfg80211_chandef_create);
+struct cfg80211_per_bw_puncturing_values {
+ u8 len;
+ const u16 *valid_values;
+};
+
+static const u16 puncturing_values_80mhz[] = {
+ 0x8, 0x4, 0x2, 0x1
+};
+
+static const u16 puncturing_values_160mhz[] = {
+ 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3
+};
+
+static const u16 puncturing_values_320mhz[] = {
+ 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00,
+ 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f,
+ 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f
+};
+
+#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \
+ { \
+ .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \
+ .valid_values = puncturing_values_ ## _bw ## mhz \
+ }
+
+static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = {
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320)
+};
+
+static bool valid_puncturing_bitmap(const struct cfg80211_chan_def *chandef)
+{
+ u32 idx, i, start_freq, primary_center = chandef->chan->center_freq;
+
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_80:
+ idx = 0;
+ start_freq = chandef->center_freq1 - 40;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ idx = 1;
+ start_freq = chandef->center_freq1 - 80;
+ break;
+ case NL80211_CHAN_WIDTH_320:
+ idx = 2;
+ start_freq = chandef->center_freq1 - 160;
+ break;
+ default:
+ return chandef->punctured == 0;
+ }
+
+ if (!chandef->punctured)
+ return true;
+
+ /* check if primary channel is punctured */
+ if (chandef->punctured & (u16)BIT((primary_center - start_freq) / 20))
+ return false;
+
+ for (i = 0; i < per_bw_puncturing[idx].len; i++) {
+ if (per_bw_puncturing[idx].valid_values[i] == chandef->punctured)
+ return true;
+ }
+
+ return false;
+}
+
static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
{
int max_contiguous = 0;
@@ -317,72 +383,81 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
!cfg80211_edmg_chandef_valid(chandef))
return false;
- return true;
+ return valid_puncturing_bitmap(chandef);
}
EXPORT_SYMBOL(cfg80211_chandef_valid);
-static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
- u32 *pri40, u32 *pri80, u32 *pri160)
+int cfg80211_chandef_primary(const struct cfg80211_chan_def *c,
+ enum nl80211_chan_width primary_chan_width,
+ u16 *punctured)
{
- int tmp;
+ int pri_width = nl80211_chan_width_to_mhz(primary_chan_width);
+ int width = cfg80211_chandef_get_width(c);
+ u32 control = c->chan->center_freq;
+ u32 center = c->center_freq1;
+ u16 _punct = 0;
- switch (c->width) {
- case NL80211_CHAN_WIDTH_40:
- *pri40 = c->center_freq1;
- *pri80 = 0;
- *pri160 = 0;
- break;
- case NL80211_CHAN_WIDTH_80:
- case NL80211_CHAN_WIDTH_80P80:
- *pri160 = 0;
- *pri80 = c->center_freq1;
- /* n_P20 */
- tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 20 + 40 * tmp;
- break;
- case NL80211_CHAN_WIDTH_160:
- *pri160 = c->center_freq1;
- /* n_P20 */
- tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 60 + 40 * tmp;
- /* n_P80 */
- tmp /= 2;
- *pri80 = c->center_freq1 - 40 + 80 * tmp;
- break;
- case NL80211_CHAN_WIDTH_320:
- /* n_P20 */
- tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- *pri40 = c->center_freq1 - 140 + 40 * tmp;
- /* n_P80 */
- tmp /= 2;
- *pri80 = c->center_freq1 - 120 + 80 * tmp;
- /* n_P160 */
- tmp /= 2;
- *pri160 = c->center_freq1 - 80 + 160 * tmp;
- break;
- default:
- WARN_ON_ONCE(1);
+ if (WARN_ON_ONCE(pri_width < 0 || width < 0))
+ return -1;
+
+ /* not intended to be called this way, can't determine */
+ if (WARN_ON_ONCE(pri_width > width))
+ return -1;
+
+ if (!punctured)
+ punctured = &_punct;
+
+ *punctured = c->punctured;
+
+ while (width > pri_width) {
+ unsigned int bits_to_drop = width / 20 / 2;
+
+ if (control > center) {
+ center += width / 4;
+ *punctured >>= bits_to_drop;
+ } else {
+ center -= width / 4;
+ *punctured &= (1 << bits_to_drop) - 1;
+ }
+ width /= 2;
}
+
+ return center;
}
+EXPORT_SYMBOL(cfg80211_chandef_primary);
-const struct cfg80211_chan_def *
-cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
- const struct cfg80211_chan_def *c2)
+static const struct cfg80211_chan_def *
+check_chandef_primary_compat(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2,
+ enum nl80211_chan_width primary_chan_width)
{
- u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80, c1_pri160, c2_pri160;
+ u16 punct_c1 = 0, punct_c2 = 0;
+
+ /* check primary is compatible -> error if not */
+ if (cfg80211_chandef_primary(c1, primary_chan_width, &punct_c1) !=
+ cfg80211_chandef_primary(c2, primary_chan_width, &punct_c2))
+ return ERR_PTR(-EINVAL);
+
+ if (punct_c1 != punct_c2)
+ return ERR_PTR(-EINVAL);
+
+ /* assumes c1 is smaller width, if that was just checked -> done */
+ if (c1->width == primary_chan_width)
+ return c2;
+
+ /* otherwise continue checking the next width */
+ return NULL;
+}
+
+static const struct cfg80211_chan_def *
+_cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2)
+{
+ const struct cfg80211_chan_def *ret;
/* If they are identical, return */
if (cfg80211_chandef_identical(c1, c2))
- return c1;
+ return c2;
/* otherwise, must have same control channel */
if (c1->chan != c2->chan)
@@ -396,53 +471,76 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
return NULL;
/*
- * can't be compatible if one of them is 5 or 10 MHz,
+ * can't be compatible if one of them is 5/10 MHz or S1G
* but they don't have the same width.
*/
- if (c1->width == NL80211_CHAN_WIDTH_5 ||
- c1->width == NL80211_CHAN_WIDTH_10 ||
- c2->width == NL80211_CHAN_WIDTH_5 ||
- c2->width == NL80211_CHAN_WIDTH_10)
+#define NARROW_OR_S1G(width) ((width) == NL80211_CHAN_WIDTH_5 || \
+ (width) == NL80211_CHAN_WIDTH_10 || \
+ (width) == NL80211_CHAN_WIDTH_1 || \
+ (width) == NL80211_CHAN_WIDTH_2 || \
+ (width) == NL80211_CHAN_WIDTH_4 || \
+ (width) == NL80211_CHAN_WIDTH_8 || \
+ (width) == NL80211_CHAN_WIDTH_16)
+
+ if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width))
return NULL;
- if (c1->width == NL80211_CHAN_WIDTH_20_NOHT ||
- c1->width == NL80211_CHAN_WIDTH_20)
+ /*
+ * Make sure that c1 is always the narrower one, so that later
+ * we either return NULL or c2 and don't have to check both
+ * directions.
+ */
+ if (c1->width > c2->width)
+ swap(c1, c2);
+
+ /*
+ * No further checks needed if the "narrower" one is only 20 MHz.
+ * Here "narrower" includes being a 20 MHz non-HT channel vs. a
+ * 20 MHz HT (or later) one.
+ */
+ if (c1->width <= NL80211_CHAN_WIDTH_20)
return c2;
- if (c2->width == NL80211_CHAN_WIDTH_20_NOHT ||
- c2->width == NL80211_CHAN_WIDTH_20)
- return c1;
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_40);
+ if (ret)
+ return ret;
- chandef_primary_freqs(c1, &c1_pri40, &c1_pri80, &c1_pri160);
- chandef_primary_freqs(c2, &c2_pri40, &c2_pri80, &c2_pri160);
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_80);
+ if (ret)
+ return ret;
- if (c1_pri40 != c2_pri40)
+ /*
+ * If c1 is 80+80, then c2 is 160 or higher, but that cannot
+ * match. If c2 was also 80+80 it was already either accepted
+ * or rejected above (identical or not, respectively.)
+ */
+ if (c1->width == NL80211_CHAN_WIDTH_80P80)
return NULL;
- if (c1->width == NL80211_CHAN_WIDTH_40)
- return c2;
-
- if (c2->width == NL80211_CHAN_WIDTH_40)
- return c1;
+ ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_160);
+ if (ret)
+ return ret;
- if (c1_pri80 != c2_pri80)
- return NULL;
+ /*
+ * Getting here would mean they're both wider than 160, have the
+ * same primary 160, but are not identical - this cannot happen
+ * since they must be 320 (no wider chandefs exist, at least yet.)
+ */
+ WARN_ON_ONCE(1);
- if (c1->width == NL80211_CHAN_WIDTH_80 &&
- c2->width > NL80211_CHAN_WIDTH_80)
- return c2;
+ return NULL;
+}
- if (c2->width == NL80211_CHAN_WIDTH_80 &&
- c1->width > NL80211_CHAN_WIDTH_80)
- return c1;
+const struct cfg80211_chan_def *
+cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
+ const struct cfg80211_chan_def *c2)
+{
+ const struct cfg80211_chan_def *ret;
- WARN_ON(!c1_pri160 && !c2_pri160);
- if (c1_pri160 && c2_pri160 && c1_pri160 != c2_pri160)
+ ret = _cfg80211_chandef_compatible(c1, c2);
+ if (IS_ERR(ret))
return NULL;
-
- if (c1->width > c2->width)
- return c1;
- return c2;
+ return ret;
}
EXPORT_SYMBOL(cfg80211_chandef_compatible);
@@ -1047,7 +1145,7 @@ EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time);
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
- u32 prohibited_flags)
+ u32 prohibited_flags, bool monitor)
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
@@ -1057,7 +1155,11 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
c = ieee80211_get_channel_khz(wiphy, freq);
- if (!c || c->flags & prohibited_flags)
+ if (!c)
+ return false;
+ if (monitor && c->flags & IEEE80211_CHAN_CAN_MONITOR)
+ continue;
+ if (c->flags & prohibited_flags)
return false;
}
@@ -1117,9 +1219,9 @@ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
return true;
}
-bool cfg80211_chandef_usable(struct wiphy *wiphy,
- const struct cfg80211_chan_def *chandef,
- u32 prohibited_flags)
+bool _cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags, bool monitor)
{
struct ieee80211_sta_ht_cap *ht_cap;
struct ieee80211_sta_vht_cap *vht_cap;
@@ -1281,14 +1383,22 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
if (!cfg80211_secondary_chans_ok(wiphy,
ieee80211_chandef_to_khz(chandef),
- width, prohibited_flags))
+ width, prohibited_flags, monitor))
return false;
if (!chandef->center_freq2)
return true;
return cfg80211_secondary_chans_ok(wiphy,
MHZ_TO_KHZ(chandef->center_freq2),
- width, prohibited_flags);
+ width, prohibited_flags, monitor);
+}
+
+bool cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags)
+{
+ return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags,
+ false);
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
@@ -1532,72 +1642,3 @@ struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
}
}
EXPORT_SYMBOL(wdev_chandef);
-
-struct cfg80211_per_bw_puncturing_values {
- u8 len;
- const u16 *valid_values;
-};
-
-static const u16 puncturing_values_80mhz[] = {
- 0x8, 0x4, 0x2, 0x1
-};
-
-static const u16 puncturing_values_160mhz[] = {
- 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3
-};
-
-static const u16 puncturing_values_320mhz[] = {
- 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00,
- 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f,
- 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f
-};
-
-#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \
- { \
- .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \
- .valid_values = puncturing_values_ ## _bw ## mhz \
- }
-
-static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = {
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80),
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160),
- CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320)
-};
-
-bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
- const struct cfg80211_chan_def *chandef)
-{
- u32 idx, i, start_freq;
-
- switch (chandef->width) {
- case NL80211_CHAN_WIDTH_80:
- idx = 0;
- start_freq = chandef->center_freq1 - 40;
- break;
- case NL80211_CHAN_WIDTH_160:
- idx = 1;
- start_freq = chandef->center_freq1 - 80;
- break;
- case NL80211_CHAN_WIDTH_320:
- idx = 2;
- start_freq = chandef->center_freq1 - 160;
- break;
- default:
- *bitmap = 0;
- break;
- }
-
- if (!*bitmap)
- return true;
-
- /* check if primary channel is punctured */
- if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20))
- return false;
-
- for (i = 0; i < per_bw_puncturing[idx].len; i++)
- if (per_bw_puncturing[idx].valid_values[i] == *bitmap)
- return true;
-
- return false;
-}
-EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3fb1b63735..4b1f45e307 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work)
if (wk) {
list_del_init(&wk->entry);
if (!list_empty(&rdev->wiphy_work_list))
- schedule_work(work);
+ queue_work(system_unbound_wq, work);
spin_unlock_irq(&rdev->wiphy_work_lock);
wk->func(&rdev->wiphy, wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 13657a85cf..118f2f6198 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -362,7 +362,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
struct cfg80211_auth_request *req);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct cfg80211_assoc_request *req);
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack);
int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
struct net_device *dev, const u8 *bssid,
const u8 *ie, int ie_len, u16 reason,
@@ -491,6 +492,9 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
struct ieee80211_channel *chan,
bool primary_only);
+bool _cfg80211_chandef_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef,
+ u32 prohibited_flags, bool monitor);
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
@@ -549,9 +553,53 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);
+/**
+ * struct cfg80211_colocated_ap - colocated AP information
+ *
+ * @list: linked list to all colocated APs
+ * @bssid: BSSID of the reported AP
+ * @ssid: SSID of the reported AP
+ * @ssid_len: length of the ssid
+ * @center_freq: frequency the reported AP is on
+ * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
+ * that operate in the same channel as the reported AP and that might be
+ * detected by a STA receiving this frame, are transmitting unsolicited
+ * Probe Response frames every 20 TUs
+ * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
+ * @same_ssid: the reported AP has the same SSID as the reporting AP
+ * @multi_bss: the reported AP is part of a multiple BSSID set
+ * @transmitted_bssid: the reported AP is the transmitting BSSID
+ * @colocated_ess: all the APs that share the same ESS as the reported AP are
+ * colocated and can be discovered via legacy bands.
+ * @short_ssid_valid: short_ssid is valid and can be used
+ * @short_ssid: the short SSID for this SSID
+ * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
+ */
+struct cfg80211_colocated_ap {
+ struct list_head list;
+ u8 bssid[ETH_ALEN];
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ size_t ssid_len;
+ u32 short_ssid;
+ u32 center_freq;
+ u8 unsolicited_probe:1,
+ oct_recommended:1,
+ same_ssid:1,
+ multi_bss:1,
+ transmitted_bssid:1,
+ colocated_ess:1,
+ short_ssid_valid:1;
+ s8 psd_20;
+};
+
#if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST)
#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
#define VISIBLE_IF_CFG80211_KUNIT
+void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list);
+
+int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+ struct list_head *list);
+
size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
const u8 *subie, size_t subie_len,
u8 *new_ie, size_t new_ie_len);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index f635a8b6ca..4052041a19 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -4,7 +4,7 @@
*
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
* Copyright (c) 2015 Intel Deutschland GmbH
- * Copyright (C) 2019-2020, 2022-2023 Intel Corporation
+ * Copyright (C) 2019-2020, 2022-2024 Intel Corporation
*/
#include <linux/kernel.h>
@@ -241,12 +241,12 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
char *buf = kmalloc(128, gfp);
if (buf) {
- sprintf(buf, "MLME-MICHAELMICFAILURE.indication("
- "keyid=%d %scast addr=%pM)", key_id,
- key_type == NL80211_KEYTYPE_GROUP ? "broad" : "uni",
- addr);
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = strlen(buf);
+ wrqu.data.length =
+ sprintf(buf, "MLME-MICHAELMICFAILURE."
+ "indication(keyid=%d %scast addr=%pM)",
+ key_id, key_type == NL80211_KEYTYPE_GROUP
+ ? "broad" : "uni", addr);
wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
kfree(buf);
}
@@ -325,28 +325,136 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
p1[i] &= p2[i];
}
-/* Note: caller must cfg80211_put_bss() regardless of result */
-int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_assoc_request *req)
+static int
+cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a,
+ const struct ieee80211_multi_link_elem *mle_b,
+ struct netlink_ext_ack *extack)
{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err, i, j;
+ const struct ieee80211_mle_basic_common_info *common_a, *common_b;
- lockdep_assert_wiphy(wdev->wiphy);
+ common_a = (const void *)mle_a->variable;
+ common_b = (const void *)mle_b->variable;
+
+ if (memcmp(common_a->mld_mac_addr, common_b->mld_mac_addr, ETH_ALEN)) {
+ NL_SET_ERR_MSG(extack, "AP MLD address mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) !=
+ ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) !=
+ ieee80211_mle_get_eml_cap((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link EML capabilities mismatch");
+ return -EINVAL;
+ }
+
+ if (ieee80211_mle_get_mld_capa_op((const u8 *)mle_a) !=
+ ieee80211_mle_get_mld_capa_op((const u8 *)mle_b)) {
+ NL_SET_ERR_MSG(extack, "link MLD capabilities/ops mismatch");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cfg80211_mlme_check_mlo(struct net_device *dev,
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack)
+{
+ const struct ieee80211_multi_link_elem *mles[ARRAY_SIZE(req->links)] = {};
+ int i;
+
+ if (req->link_id < 0)
+ return 0;
+
+ if (!req->links[req->link_id].bss) {
+ NL_SET_ERR_MSG(extack, "no BSS for assoc link");
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < ARRAY_SIZE(req->links); i++) {
+ const struct cfg80211_bss_ies *ies;
+ const struct element *ml;
- for (i = 1; i < ARRAY_SIZE(req->links); i++) {
if (!req->links[i].bss)
continue;
- for (j = 0; j < i; j++) {
- if (req->links[i].bss == req->links[j].bss)
- return -EINVAL;
+
+ if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr)) {
+ NL_SET_ERR_MSG(extack, "BSSID must not be our address");
+ req->links[i].error = -EINVAL;
+ goto error;
}
- if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr))
- return -EINVAL;
+ ies = rcu_dereference(req->links[i].bss->ies);
+ ml = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
+ ies->data, ies->len);
+ if (!ml) {
+ NL_SET_ERR_MSG(extack, "MLO BSS w/o ML element");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+
+ if (!ieee80211_mle_type_ok(ml->data + 1,
+ IEEE80211_ML_CONTROL_TYPE_BASIC,
+ ml->datalen - 1)) {
+ NL_SET_ERR_MSG(extack, "BSS with invalid ML element");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+
+ mles[i] = (const void *)(ml->data + 1);
+
+ if (ieee80211_mle_get_link_id((const u8 *)mles[i]) != i) {
+ NL_SET_ERR_MSG(extack, "link ID mismatch");
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
+ }
+
+ if (WARN_ON(!mles[req->link_id]))
+ goto error;
+
+ for (i = 0; i < ARRAY_SIZE(req->links); i++) {
+ if (i == req->link_id || !req->links[i].bss)
+ continue;
+
+ if (WARN_ON(!mles[i]))
+ goto error;
+
+ if (cfg80211_mlme_check_mlo_compat(mles[req->link_id], mles[i],
+ extack)) {
+ req->links[i].error = -EINVAL;
+ goto error;
+ }
}
+ rcu_read_unlock();
+ return 0;
+error:
+ rcu_read_unlock();
+ return -EINVAL;
+}
+
+/* Note: caller must cfg80211_put_bss() regardless of result */
+int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_assoc_request *req,
+ struct netlink_ext_ack *extack)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int err;
+
+ lockdep_assert_wiphy(wdev->wiphy);
+
+ err = cfg80211_mlme_check_mlo(dev, req, extack);
+ if (err)
+ return err;
+
if (wdev->connected &&
(!req->prev_bssid ||
!ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid)))
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bd54a928ba..c2829d673b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if.h>
@@ -468,6 +468,10 @@ static const struct netlink_range_validation nl80211_punct_bitmap_range = {
.max = 0xffff,
};
+static const struct netlink_range_validation q_range = {
+ .max = INT_MAX,
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -581,7 +585,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
[NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
- [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
+ [NL80211_ATTR_WPA_VERSIONS] =
+ NLA_POLICY_RANGE(NLA_U32, 0,
+ NL80211_WPA_VERSION_1 |
+ NL80211_WPA_VERSION_2 |
+ NL80211_WPA_VERSION_3),
[NL80211_ATTR_PID] = { .type = NLA_U32 },
[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
[NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN),
@@ -750,7 +758,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
- [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+ [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range),
[NL80211_ATTR_HE_CAPABILITY] =
NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa,
NL80211_HE_MAX_CAPABILITY_LEN),
@@ -821,6 +829,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG },
[NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
+ [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -906,22 +915,11 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
};
static const struct nla_policy
-nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = {
- [NL80211_BAND_2GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_5GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_6GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_60GHZ] = { .type = NLA_S32 },
- [NL80211_BAND_LC] = { .type = NLA_S32 },
-};
-
-static const struct nla_policy
nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_SSID_LEN },
[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
- [NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] =
- NLA_POLICY_NESTED(nl80211_match_band_rssi_policy),
};
static const struct nla_policy
@@ -1204,11 +1202,14 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT))
+ if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT))
+ if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR))
goto nla_put_failure;
}
@@ -3224,24 +3225,9 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
wdev->iftype == NL80211_IFTYPE_P2P_GO;
}
-static int nl80211_parse_punct_bitmap(struct cfg80211_registered_device *rdev,
- struct genl_info *info,
- const struct cfg80211_chan_def *chandef,
- u16 *punct_bitmap)
-{
- if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT))
- return -EINVAL;
-
- *punct_bitmap = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]);
- if (!cfg80211_valid_disable_subchannel_bitmap(punct_bitmap, chandef))
- return -EINVAL;
-
- return 0;
-}
-
-int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
- struct genl_info *info,
- struct cfg80211_chan_def *chandef)
+static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ struct genl_info *info, bool monitor,
+ struct cfg80211_chan_def *chandef)
{
struct netlink_ext_ack *extack = info->extack;
struct nlattr **attrs = info->attrs;
@@ -3266,10 +3252,9 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
chandef->freq1_offset = control_freq % 1000;
chandef->center_freq2 = 0;
- /* Primary channel not allowed */
- if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) {
+ if (!chandef->chan) {
NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ],
- "Channel is disabled");
+ "Unknown channel");
return -EINVAL;
}
@@ -3346,13 +3331,27 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
chandef->edmg.channels = 0;
}
+ if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
+ chandef->punctured =
+ nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]);
+
+ if (chandef->punctured &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_PUNCT)) {
+ NL_SET_ERR_MSG(extack,
+ "driver doesn't support puncturing");
+ return -EINVAL;
+ }
+ }
+
if (!cfg80211_chandef_valid(chandef)) {
NL_SET_ERR_MSG(extack, "invalid channel definition");
return -EINVAL;
}
- if (!cfg80211_chandef_usable(&rdev->wiphy, chandef,
- IEEE80211_CHAN_DISABLED)) {
+ if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef,
+ IEEE80211_CHAN_DISABLED,
+ monitor)) {
NL_SET_ERR_MSG(extack, "(extension) channel is disabled");
return -EINVAL;
}
@@ -3367,6 +3366,13 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
return 0;
}
+int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ struct genl_info *info,
+ struct cfg80211_chan_def *chandef)
+{
+ return _nl80211_parse_chandef(rdev, info, false, chandef);
+}
+
static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct genl_info *info,
@@ -3391,7 +3397,9 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
link_id = 0;
}
- result = nl80211_parse_chandef(rdev, info, &chandef);
+ result = _nl80211_parse_chandef(rdev, info,
+ iftype == NL80211_IFTYPE_MONITOR,
+ &chandef);
if (result)
return result;
@@ -3414,6 +3422,33 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
if (chandef.chan != cur_chan)
return -EBUSY;
+ /* only allow this for regular channel widths */
+ switch (wdev->links[link_id].ap.chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
+ break;
+ default:
+ return -EINVAL;
+ }
+
result = rdev_set_ap_chanwidth(rdev, dev, link_id,
&chandef);
if (result)
@@ -3822,6 +3857,10 @@ int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *ch
if (chandef->center_freq2 &&
nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2))
return -ENOBUFS;
+ if (chandef->punctured &&
+ nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, chandef->punctured))
+ return -ENOBUFS;
+
return 0;
}
EXPORT_SYMBOL(nl80211_send_chandef);
@@ -4202,8 +4241,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (netif_running(dev))
return -EBUSY;
- BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
- IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
memcpy(wdev->u.mesh.id,
@@ -4309,8 +4346,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_MESH_POINT:
if (!info->attrs[NL80211_ATTR_MESH_ID])
break;
- BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
- IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
memcpy(wdev->u.mesh.id,
@@ -4450,10 +4485,7 @@ static void get_key_callback(void *c, struct key_params *params)
struct nlattr *key;
struct get_key_cookie *cookie = c;
- if ((params->key &&
- nla_put(cookie->msg, NL80211_ATTR_KEY_DATA,
- params->key_len, params->key)) ||
- (params->seq &&
+ if ((params->seq &&
nla_put(cookie->msg, NL80211_ATTR_KEY_SEQ,
params->seq_len, params->seq)) ||
(params->cipher &&
@@ -4465,10 +4497,7 @@ static void get_key_callback(void *c, struct key_params *params)
if (!key)
goto nla_put_failure;
- if ((params->key &&
- nla_put(cookie->msg, NL80211_KEY_DATA,
- params->key_len, params->key)) ||
- (params->seq &&
+ if ((params->seq &&
nla_put(cookie->msg, NL80211_KEY_SEQ,
params->seq_len, params->seq)) ||
(params->cipher &&
@@ -6069,14 +6098,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
- err = nl80211_parse_punct_bitmap(rdev, info,
- &params->chandef,
- &params->punct_bitmap);
- if (err)
- goto out;
- }
-
if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params->chandef,
wdev->iftype)) {
err = -EINVAL;
@@ -6876,7 +6897,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
- BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8);
switch (statype) {
case CFG80211_STA_MESH_PEER_KERNEL:
@@ -6936,6 +6957,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
params->link_sta_params.he_capa ||
params->link_sta_params.eht_capa)
return -EINVAL;
+ if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ return -EINVAL;
}
if (statype != CFG80211_STA_AP_CLIENT &&
@@ -6959,7 +6982,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
BIT(NL80211_STA_FLAG_ASSOCIATED) |
BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
BIT(NL80211_STA_FLAG_WME) |
- BIT(NL80211_STA_FLAG_MFP)))
+ BIT(NL80211_STA_FLAG_MFP) |
+ BIT(NL80211_STA_FLAG_SPP_AMSDU)))
return -EINVAL;
/* but authenticated/associated only if driver handles it */
@@ -7518,7 +7542,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
- BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8);
switch (dev->ieee80211_ptr->iftype) {
case NL80211_IFTYPE_AP:
@@ -7542,6 +7566,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.sta_flags_mask & auth_assoc)
return -EINVAL;
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT) &&
+ params.sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU))
+ return -EINVAL;
+
/* Older userspace, or userspace wanting to be compatible with
* !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth
* and assoc flags in the mask, but assumes the station will be
@@ -7630,14 +7659,16 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
struct station_del_parameters params;
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
memset(&params, 0, sizeof(params));
if (info->attrs[NL80211_ATTR_MAC])
params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]);
- switch (dev->ieee80211_ptr->iftype) {
+ switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MESH_POINT:
@@ -7678,6 +7709,17 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID;
}
+ /* Link ID not expected in case of non-ML operation */
+ if (!wdev->valid_links && link_id != -1)
+ return -EINVAL;
+
+ /* If given, a valid link ID should be passed during MLO */
+ if (wdev->valid_links && link_id >= 0 &&
+ !(wdev->valid_links & BIT(link_id)))
+ return -EINVAL;
+
+ params.link_id = link_id;
+
return rdev_del_station(rdev, dev, &params);
}
@@ -8102,7 +8144,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
is_indoor = true;
}
- return regulatory_hint_indoor(is_indoor, owner_nlportid);
+ regulatory_hint_indoor(is_indoor, owner_nlportid);
+ return 0;
default:
return -EINVAL;
}
@@ -9148,6 +9191,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
struct wiphy *wiphy;
int err, tmp, n_ssids = 0, n_channels, i;
size_t ie_len, size;
+ size_t ssids_offset, ie_offset;
wiphy = &rdev->wiphy;
@@ -9193,21 +9237,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
size = struct_size(request, channels, n_channels);
+ ssids_offset = size;
size = size_add(size, array_size(sizeof(*request->ssids), n_ssids));
+ ie_offset = size;
size = size_add(size, ie_len);
request = kzalloc(size, GFP_KERNEL);
if (!request)
return -ENOMEM;
+ request->n_channels = n_channels;
if (n_ssids)
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request + ssids_offset;
request->n_ssids = n_ssids;
- if (ie_len) {
- if (n_ssids)
- request->ie = (void *)(request->ssids + n_ssids);
- else
- request->ie = (void *)(request->channels + n_channels);
- }
+ if (ie_len)
+ request->ie = (void *)request + ie_offset;
i = 0;
if (scan_freqs) {
@@ -9483,41 +9526,6 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
return 0;
}
-static int
-nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy,
- struct cfg80211_match_set *match_sets,
- struct nlattr *tb_band_rssi,
- s32 rssi_thold)
-{
- struct nlattr *attr;
- int i, tmp, ret = 0;
-
- if (!wiphy_ext_feature_isset(wiphy,
- NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) {
- if (tb_band_rssi)
- ret = -EOPNOTSUPP;
- else
- for (i = 0; i < NUM_NL80211_BANDS; i++)
- match_sets->per_band_rssi_thold[i] =
- NL80211_SCAN_RSSI_THOLD_OFF;
- return ret;
- }
-
- for (i = 0; i < NUM_NL80211_BANDS; i++)
- match_sets->per_band_rssi_thold[i] = rssi_thold;
-
- nla_for_each_nested(attr, tb_band_rssi, tmp) {
- enum nl80211_band band = nla_type(attr);
-
- if (band < 0 || band >= NUM_NL80211_BANDS)
- return -EINVAL;
-
- match_sets->per_band_rssi_thold[band] = nla_get_s32(attr);
- }
-
- return 0;
-}
-
static struct cfg80211_sched_scan_request *
nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
struct nlattr **attrs, int max_match_sets)
@@ -9792,15 +9800,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
if (rssi)
request->match_sets[i].rssi_thold =
nla_get_s32(rssi);
-
- /* Parse per band RSSI attribute */
- err = nl80211_parse_sched_scan_per_band_rssi(wiphy,
- &request->match_sets[i],
- tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI],
- request->match_sets[i].rssi_thold);
- if (err)
- goto out_free;
-
i++;
}
@@ -10080,6 +10079,42 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb,
return 0;
}
+static int nl80211_parse_counter_offsets(struct cfg80211_registered_device *rdev,
+ const u8 *data, size_t datalen,
+ int first_count, struct nlattr *attr,
+ const u16 **offsets, unsigned int *n_offsets)
+{
+ int i;
+
+ *n_offsets = 0;
+
+ if (!attr)
+ return 0;
+
+ if (!nla_len(attr) || (nla_len(attr) % sizeof(u16)))
+ return -EINVAL;
+
+ *n_offsets = nla_len(attr) / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (*n_offsets > rdev->wiphy.max_num_csa_counters))
+ return -EINVAL;
+
+ *offsets = nla_data(attr);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < *n_offsets; i++) {
+ u16 offset = (*offsets)[i];
+
+ if (offset >= datalen)
+ return -EINVAL;
+
+ if (first_count != -1 && data[offset] != first_count)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -10091,7 +10126,6 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
int err;
bool need_new_beacon = false;
bool need_handle_dfs_flag = true;
- int len, i;
u32 cs_count;
if (!rdev->ops->channel_switch ||
@@ -10176,72 +10210,23 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
goto free;
}
- len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
- if (!len || (len % sizeof(u16))) {
- err = -EINVAL;
+ err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.tail,
+ params.beacon_csa.tail_len,
+ params.count,
+ csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON],
+ &params.counter_offsets_beacon,
+ &params.n_counter_offsets_beacon);
+ if (err)
goto free;
- }
- params.n_counter_offsets_beacon = len / sizeof(u16);
- if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_beacon >
- rdev->wiphy.max_num_csa_counters)) {
- err = -EINVAL;
+ err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.probe_resp,
+ params.beacon_csa.probe_resp_len,
+ params.count,
+ csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP],
+ &params.counter_offsets_presp,
+ &params.n_counter_offsets_presp);
+ if (err)
goto free;
- }
-
- params.counter_offsets_beacon =
- nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
-
- /* sanity checks - counters should fit and be the same */
- for (i = 0; i < params.n_counter_offsets_beacon; i++) {
- u16 offset = params.counter_offsets_beacon[i];
-
- if (offset >= params.beacon_csa.tail_len) {
- err = -EINVAL;
- goto free;
- }
-
- if (params.beacon_csa.tail[offset] != params.count) {
- err = -EINVAL;
- goto free;
- }
- }
-
- if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
- len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
- if (!len || (len % sizeof(u16))) {
- err = -EINVAL;
- goto free;
- }
-
- params.n_counter_offsets_presp = len / sizeof(u16);
- if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_presp >
- rdev->wiphy.max_num_csa_counters)) {
- err = -EINVAL;
- goto free;
- }
-
- params.counter_offsets_presp =
- nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
-
- /* sanity checks - counters should fit and be the same */
- for (i = 0; i < params.n_counter_offsets_presp; i++) {
- u16 offset = params.counter_offsets_presp[i];
-
- if (offset >= params.beacon_csa.probe_resp_len) {
- err = -EINVAL;
- goto free;
- }
-
- if (params.beacon_csa.probe_resp[offset] !=
- params.count) {
- err = -EINVAL;
- goto free;
- }
- }
- }
skip_beacons:
err = nl80211_parse_chandef(rdev, info, &params.chandef);
@@ -10272,14 +10257,7 @@ skip_beacons:
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
- if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
- err = nl80211_parse_punct_bitmap(rdev, info,
- &params.chandef,
- &params.punct_bitmap);
- if (err)
- goto free;
- }
-
+ params.link_id = link_id;
err = rdev_channel_switch(rdev, dev, &params);
free:
@@ -10652,13 +10630,6 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
return res;
}
-static bool nl80211_valid_wpa_versions(u32 wpa_versions)
-{
- return !(wpa_versions & ~(NL80211_WPA_VERSION_1 |
- NL80211_WPA_VERSION_2 |
- NL80211_WPA_VERSION_3));
-}
-
static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -10884,12 +10855,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
return -EINVAL;
}
- if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) {
+ if (info->attrs[NL80211_ATTR_WPA_VERSIONS])
settings->wpa_versions =
nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]);
- if (!nl80211_valid_wpa_versions(settings->wpa_versions))
- return -EINVAL;
- }
if (info->attrs[NL80211_ATTR_AKM_SUITES]) {
void *data;
@@ -11104,6 +11072,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
sizeof(req.s1g_capa));
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_ASSOC_SPP_AMSDU])) {
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT)) {
+ GENL_SET_ERR_MSG(info, "SPP A-MSDUs not supported");
+ return -EINVAL;
+ }
+ req.flags |= ASSOC_REQ_SPP_AMSDU;
+ }
+
req.link_id = nl80211_link_id_or_invalid(info->attrs);
if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
@@ -11229,7 +11206,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
struct nlattr *link;
int rem = 0;
- err = cfg80211_mlme_assoc(rdev, dev, &req);
+ err = cfg80211_mlme_assoc(rdev, dev, &req,
+ info->extack);
if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
dev->ieee80211_ptr->conn_owner_nlportid =
@@ -12677,23 +12655,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
- if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
- int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
- int i;
-
- if (len % sizeof(u16))
- return -EINVAL;
-
- params.n_csa_offsets = len / sizeof(u16);
- params.csa_offsets =
- nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
-
- /* check that all the offsets fit the frame */
- for (i = 0; i < params.n_csa_offsets; i++) {
- if (params.csa_offsets[i] >= params.len)
- return -EINVAL;
- }
- }
+ err = nl80211_parse_counter_offsets(rdev, NULL, params.len, -1,
+ info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX],
+ &params.csa_offsets,
+ &params.n_csa_offsets);
+ if (err)
+ return err;
if (!params.dont_wait_for_ack) {
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -14092,6 +14059,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
error:
for (i = 0; i < new_coalesce.n_rules; i++) {
tmp_rule = &new_coalesce.rules[i];
+ if (!tmp_rule)
+ continue;
for (j = 0; j < tmp_rule->n_patterns; j++)
kfree(tmp_rule->patterns[j].mask);
kfree(tmp_rule->patterns);
@@ -16091,6 +16060,7 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
params.counter_offset_presp = offset;
}
+ params.link_id = nl80211_link_id(info->attrs);
err = rdev_color_change(rdev, dev, &params);
out:
@@ -16830,6 +16800,10 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_station,
.flags = GENL_UNS_ADMIN_PERM,
+ /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on
+ * whether MAC address is passed or not. If MAC address is
+ * passed, then even during MLO, link ID is not required.
+ */
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
@@ -17489,7 +17463,8 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_color_change,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_SET_FILS_AAD,
@@ -19400,7 +19375,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef,
gfp_t gfp,
enum nl80211_commands notif,
- u8 count, bool quiet, u16 punct_bitmap)
+ u8 count, bool quiet)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct sk_buff *msg;
@@ -19434,9 +19409,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
}
- if (nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, punct_bitmap))
- goto nla_put_failure;
-
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -19449,7 +19421,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id, u16 punct_bitmap)
+ unsigned int link_id)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19458,7 +19430,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap);
+ trace_cfg80211_ch_switch_notify(dev, chandef, link_id);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
@@ -19487,15 +19459,14 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
- NL80211_CMD_CH_SWITCH_NOTIFY, 0, false,
- punct_bitmap);
+ NL80211_CMD_CH_SWITCH_NOTIFY, 0, false);
}
EXPORT_SYMBOL(cfg80211_ch_switch_notify);
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet, u16 punct_bitmap)
+ bool quiet)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19504,19 +19475,18 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id,
- punct_bitmap);
+ trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
- count, quiet, punct_bitmap);
+ count, quiet);
}
EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
int cfg80211_bss_color_notify(struct net_device *dev,
enum nl80211_commands cmd, u8 count,
- u64 color_bitmap)
+ u64 color_bitmap, u8 link_id)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19539,6 +19509,10 @@ int cfg80211_bss_color_notify(struct net_device *dev,
if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
+ if (wdev->valid_links &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))
+ goto nla_put_failure;
+
if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED &&
nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count))
goto nla_put_failure;
@@ -19887,6 +19861,11 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS))
goto free_msg;
+ if (wakeup->unprot_deauth_disassoc &&
+ nla_put_flag(msg,
+ NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC))
+ goto free_msg;
+
if (wakeup->packet) {
u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211;
u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN;
@@ -20167,9 +20146,26 @@ int cfg80211_external_auth_request(struct net_device *dev,
if (!hdr)
goto nla_put_failure;
+ /* Some historical mistakes in drivers <-> userspace interface (notably
+ * between drivers and wpa_supplicant) led to a big-endian conversion
+ * being needed on NL80211_ATTR_AKM_SUITES _only_ when its value is
+ * WLAN_AKM_SUITE_SAE. This is now fixed on userspace side, but for the
+ * benefit of older wpa_supplicant versions, send this particular value
+ * in big-endian. Note that newer wpa_supplicant will also detect this
+ * particular value in big endian still, so it all continues to work.
+ */
+ if (params->key_mgmt_suite == WLAN_AKM_SUITE_SAE) {
+ if (nla_put_be32(msg, NL80211_ATTR_AKM_SUITES,
+ cpu_to_be32(WLAN_AKM_SUITE_SAE)))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(msg, NL80211_ATTR_AKM_SUITES,
+ params->key_mgmt_suite))
+ goto nla_put_failure;
+ }
+
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
- nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
params->action) ||
nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index e106dcea39..c569c37da3 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_period = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
out->ftm.burst_period =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+ nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
if (out->ftm.asap && !capa->ftm.asap) {
@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.num_bursts_exp = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
out->ftm.num_bursts_exp =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
if (capa->ftm.max_bursts_exponent >= 0 &&
out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_duration = 15;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
out->ftm.burst_duration =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
out->ftm.ftms_per_burst = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.ftmr_retries = 3;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
out->ftm.ftmr_retries =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
if (out->ftm.request_lci && !capa->ftm.request_lci) {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 43897a5269..755af47b88 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2023 Intel Corporation
+ * Copyright (C) 2018, 2021-2024 Intel Corporation
*/
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -458,6 +458,10 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev,
struct cfg80211_scan_request *request)
{
int ret;
+
+ if (WARN_ON_ONCE(!request->n_ssids && request->ssids))
+ return -EINVAL;
+
trace_rdev_scan(&rdev->wiphy, request);
ret = rdev->ops->scan(&rdev->wiphy, request);
trace_rdev_return_int(&rdev->wiphy, ret);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2741b62691..3cef0021a3 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -57,6 +57,8 @@
#include <linux/verification.h>
#include <linux/moduleparam.h>
#include <linux/firmware.h>
+#include <linux/units.h>
+
#include <net/cfg80211.h>
#include "core.h"
#include "reg.h"
@@ -1289,20 +1291,17 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
u32 freq_khz)
{
-#define ONE_GHZ_IN_KHZ 1000000
/*
* From 802.11ad: directional multi-gigabit (DMG):
* Pertaining to operation in a frequency band containing a channel
* with the Channel starting frequency above 45 GHz.
*/
- u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ?
- 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
+ u32 limit = freq_khz > 45 * KHZ_PER_GHZ ? 20 * KHZ_PER_GHZ : 2 * KHZ_PER_GHZ;
if (abs(freq_khz - freq_range->start_freq_khz) <= limit)
return true;
if (abs(freq_khz - freq_range->end_freq_khz) <= limit)
return true;
return false;
-#undef ONE_GHZ_IN_KHZ
}
/*
@@ -1595,10 +1594,10 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_EHT;
if (rd_flags & NL80211_RRF_DFS_CONCURRENT)
channel_flags |= IEEE80211_CHAN_DFS_CONCURRENT;
- if (rd_flags & NL80211_RRF_NO_UHB_VLP_CLIENT)
- channel_flags |= IEEE80211_CHAN_NO_UHB_VLP_CLIENT;
- if (rd_flags & NL80211_RRF_NO_UHB_AFC_CLIENT)
- channel_flags |= IEEE80211_CHAN_NO_UHB_AFC_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_6GHZ_VLP_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_6GHZ_AFC_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT;
if (rd_flags & NL80211_RRF_PSD)
channel_flags |= IEEE80211_CHAN_PSD;
return channel_flags;
@@ -3285,7 +3284,7 @@ int regulatory_hint_user(const char *alpha2,
return 0;
}
-int regulatory_hint_indoor(bool is_indoor, u32 portid)
+void regulatory_hint_indoor(bool is_indoor, u32 portid)
{
spin_lock(&reg_indoor_lock);
@@ -3308,8 +3307,6 @@ int regulatory_hint_indoor(bool is_indoor, u32 portid)
if (!is_indoor)
reg_check_channels();
-
- return 0;
}
void regulatory_netlink_notify(u32 portid)
@@ -3667,9 +3664,9 @@ static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan)
return false;
}
-int regulatory_hint_found_beacon(struct wiphy *wiphy,
- struct ieee80211_channel *beacon_chan,
- gfp_t gfp)
+void regulatory_hint_found_beacon(struct wiphy *wiphy,
+ struct ieee80211_channel *beacon_chan,
+ gfp_t gfp)
{
struct reg_beacon *reg_beacon;
bool processing;
@@ -3678,18 +3675,18 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
beacon_chan->flags & IEEE80211_CHAN_RADAR ||
(beacon_chan->band == NL80211_BAND_2GHZ &&
!freq_is_chan_12_13_14(beacon_chan->center_freq)))
- return 0;
+ return;
spin_lock_bh(&reg_pending_beacons_lock);
processing = pending_reg_beacon(beacon_chan);
spin_unlock_bh(&reg_pending_beacons_lock);
if (processing)
- return 0;
+ return;
reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp);
if (!reg_beacon)
- return -ENOMEM;
+ return;
pr_debug("Found new beacon on frequency: %d.%03d MHz (Ch %d) on %s\n",
beacon_chan->center_freq, beacon_chan->freq_offset,
@@ -3709,8 +3706,6 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
spin_unlock_bh(&reg_pending_beacons_lock);
schedule_work(&reg_work);
-
- return 0;
}
static void print_rd_rules(const struct ieee80211_regdomain *rd)
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a02ef5609f..e1b211c4f7 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -42,7 +42,7 @@ int regulatory_hint_user(const char *alpha2,
* device is operating in an indoor environment.
* @portid: the netlink port ID on which the hint was given.
*/
-int regulatory_hint_indoor(bool is_indoor, u32 portid);
+void regulatory_hint_indoor(bool is_indoor, u32 portid);
/**
* regulatory_netlink_notify - notify on released netlink socket
@@ -82,9 +82,9 @@ bool reg_last_request_cell_base(void);
* on a newly found BSS. If you cannot make use of this feature you can
* set the wiphy->disable_beacon_hints to true.
*/
-int regulatory_hint_found_beacon(struct wiphy *wiphy,
- struct ieee80211_channel *beacon_chan,
- gfp_t gfp);
+void regulatory_hint_found_beacon(struct wiphy *wiphy,
+ struct ieee80211_channel *beacon_chan,
+ gfp_t gfp);
/**
* regulatory_hint_country_ie - hints a country IE as a regulatory domain
@@ -137,13 +137,14 @@ void regulatory_hint_disconnect(void);
* Get a value specifying the U-NII band frequency belongs to.
* U-NII bands are defined by the FCC in C.F.R 47 part 15.
*
- * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
+ * Return: -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
* 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3.
*/
int cfg80211_get_unii(int freq);
/**
* regulatory_indoor_allowed - is indoor operation allowed
+ * Return: %true if indoor operation is allowed, %false otherwise
*/
bool regulatory_indoor_allowed(void);
@@ -173,11 +174,13 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
* reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured
* @wiphy1: wiphy it's dfs_region to be checked against that of wiphy2
* @wiphy2: wiphy it's dfs_region to be checked against that of wiphy1
+ * Return: %true if both wiphys have the same DFS domain, %false otherwise
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
/**
* reg_reload_regdb - reload the regulatory.db firmware file
+ * Return: 0 for success, an error code otherwise
*/
int reg_reload_regdb(void);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index f138f88be9..292b530a6d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -77,45 +77,6 @@ MODULE_PARM_DESC(bss_entries_limit,
#define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ)
-/**
- * struct cfg80211_colocated_ap - colocated AP information
- *
- * @list: linked list to all colocated aPS
- * @bssid: BSSID of the reported AP
- * @ssid: SSID of the reported AP
- * @ssid_len: length of the ssid
- * @center_freq: frequency the reported AP is on
- * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
- * that operate in the same channel as the reported AP and that might be
- * detected by a STA receiving this frame, are transmitting unsolicited
- * Probe Response frames every 20 TUs
- * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
- * @same_ssid: the reported AP has the same SSID as the reporting AP
- * @multi_bss: the reported AP is part of a multiple BSSID set
- * @transmitted_bssid: the reported AP is the transmitting BSSID
- * @colocated_ess: all the APs that share the same ESS as the reported AP are
- * colocated and can be discovered via legacy bands.
- * @short_ssid_valid: short_ssid is valid and can be used
- * @short_ssid: the short SSID for this SSID
- * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
- */
-struct cfg80211_colocated_ap {
- struct list_head list;
- u8 bssid[ETH_ALEN];
- u8 ssid[IEEE80211_MAX_SSID_LEN];
- size_t ssid_len;
- u32 short_ssid;
- u32 center_freq;
- u8 unsolicited_probe:1,
- oct_recommended:1,
- same_ssid:1,
- multi_bss:1,
- transmitted_bssid:1,
- colocated_ess:1,
- short_ssid_valid:1;
- s8 psd_20;
-};
-
static void bss_free(struct cfg80211_internal_bss *bss)
{
struct cfg80211_bss_ies *ies;
@@ -566,7 +527,8 @@ static int cfg80211_calc_short_ssid(const struct cfg80211_bss_ies *ies,
return 0;
}
-static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
+VISIBLE_IF_CFG80211_KUNIT void
+cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
{
struct cfg80211_colocated_ap *ap, *tmp_ap;
@@ -575,6 +537,7 @@ static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
kfree(ap);
}
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_free_coloc_ap_list);
static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
const u8 *pos, u8 length,
@@ -648,104 +611,140 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
return 0;
}
-static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
- struct list_head *list)
+bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
+ enum cfg80211_rnr_iter_ret
+ (*iter)(void *data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len),
+ void *iter_data)
{
- struct ieee80211_neighbor_ap_info *ap_info;
- const struct element *elem, *ssid_elem;
+ const struct element *rnr;
const u8 *pos, *end;
- u32 s_ssid_tmp;
- int n_coloc = 0, ret;
- LIST_HEAD(ap_list);
- ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
- if (ret)
- return 0;
+ for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
+ elems, elems_len) {
+ const struct ieee80211_neighbor_ap_info *info;
- for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
- ies->data, ies->len) {
- pos = elem->data;
- end = elem->data + elem->datalen;
+ pos = rnr->data;
+ end = rnr->data + rnr->datalen;
/* RNR IE may contain more than one NEIGHBOR_AP_INFO */
- while (pos + sizeof(*ap_info) <= end) {
- enum nl80211_band band;
- int freq;
+ while (sizeof(*info) <= end - pos) {
u8 length, i, count;
+ u8 type;
- ap_info = (void *)pos;
- count = u8_get_bits(ap_info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1;
- length = ap_info->tbtt_info_len;
+ info = (void *)pos;
+ count = u8_get_bits(info->tbtt_info_hdr,
+ IEEE80211_AP_INFO_TBTT_HDR_COUNT) +
+ 1;
+ length = info->tbtt_info_len;
- pos += sizeof(*ap_info);
+ pos += sizeof(*info);
- if (!ieee80211_operating_class_to_band(ap_info->op_class,
- &band))
- break;
+ if (count * length > end - pos)
+ return false;
- freq = ieee80211_channel_to_frequency(ap_info->channel,
- band);
+ type = u8_get_bits(info->tbtt_info_hdr,
+ IEEE80211_AP_INFO_TBTT_HDR_TYPE);
- if (end - pos < count * length)
- break;
+ for (i = 0; i < count; i++) {
+ switch (iter(iter_data, type, info,
+ pos, length)) {
+ case RNR_ITER_CONTINUE:
+ break;
+ case RNR_ITER_BREAK:
+ return true;
+ case RNR_ITER_ERROR:
+ return false;
+ }
- if (u8_get_bits(ap_info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_TYPE) !=
- IEEE80211_TBTT_INFO_TYPE_TBTT) {
- pos += count * length;
- continue;
+ pos += length;
}
+ }
- /* TBTT info must include bss param + BSSID +
- * (short SSID or same_ssid bit to be set).
- * ignore other options, and move to the
- * next AP info
- */
- if (band != NL80211_BAND_6GHZ ||
- !(length == offsetofend(struct ieee80211_tbtt_info_7_8_9,
- bss_params) ||
- length == sizeof(struct ieee80211_tbtt_info_7_8_9) ||
- length >= offsetofend(struct ieee80211_tbtt_info_ge_11,
- bss_params))) {
- pos += count * length;
- continue;
- }
+ if (pos != end)
+ return false;
+ }
- for (i = 0; i < count; i++) {
- struct cfg80211_colocated_ap *entry;
+ return true;
+}
+EXPORT_SYMBOL_GPL(cfg80211_iter_rnr);
+
+struct colocated_ap_data {
+ const struct element *ssid_elem;
+ struct list_head ap_list;
+ u32 s_ssid_tmp;
+ int n_coloc;
+};
- entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN,
- GFP_ATOMIC);
+static enum cfg80211_rnr_iter_ret
+cfg80211_parse_colocated_ap_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ struct colocated_ap_data *data = _data;
+ struct cfg80211_colocated_ap *entry;
+ enum nl80211_band band;
- if (!entry)
- goto error;
+ if (type != IEEE80211_TBTT_INFO_TYPE_TBTT)
+ return RNR_ITER_CONTINUE;
- entry->center_freq = freq;
+ if (!ieee80211_operating_class_to_band(info->op_class, &band))
+ return RNR_ITER_CONTINUE;
- if (!cfg80211_parse_ap_info(entry, pos, length,
- ssid_elem,
- s_ssid_tmp)) {
- n_coloc++;
- list_add_tail(&entry->list, &ap_list);
- } else {
- kfree(entry);
- }
+ /* TBTT info must include bss param + BSSID + (short SSID or
+ * same_ssid bit to be set). Ignore other options, and move to
+ * the next AP info
+ */
+ if (band != NL80211_BAND_6GHZ ||
+ !(tbtt_info_len == offsetofend(struct ieee80211_tbtt_info_7_8_9,
+ bss_params) ||
+ tbtt_info_len == sizeof(struct ieee80211_tbtt_info_7_8_9) ||
+ tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+ bss_params)))
+ return RNR_ITER_CONTINUE;
+
+ entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, GFP_ATOMIC);
+ if (!entry)
+ return RNR_ITER_ERROR;
+
+ entry->center_freq =
+ ieee80211_channel_to_frequency(info->channel, band);
+
+ if (!cfg80211_parse_ap_info(entry, tbtt_info, tbtt_info_len,
+ data->ssid_elem, data->s_ssid_tmp)) {
+ data->n_coloc++;
+ list_add_tail(&entry->list, &data->ap_list);
+ } else {
+ kfree(entry);
+ }
- pos += length;
- }
- }
+ return RNR_ITER_CONTINUE;
+}
-error:
- if (pos != end) {
- cfg80211_free_coloc_ap_list(&ap_list);
- return 0;
- }
+VISIBLE_IF_CFG80211_KUNIT int
+cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+ struct list_head *list)
+{
+ struct colocated_ap_data data = {};
+ int ret;
+
+ INIT_LIST_HEAD(&data.ap_list);
+
+ ret = cfg80211_calc_short_ssid(ies, &data.ssid_elem, &data.s_ssid_tmp);
+ if (ret)
+ return 0;
+
+ if (!cfg80211_iter_rnr(ies->data, ies->len,
+ cfg80211_parse_colocated_ap_iter, &data)) {
+ cfg80211_free_coloc_ap_list(&data.ap_list);
+ return 0;
}
- list_splice_tail(&ap_list, list);
- return n_coloc;
+ list_splice_tail(&data.ap_list, list);
+ return data.n_coloc;
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap);
static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
struct ieee80211_channel *chan,
@@ -813,6 +812,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
LIST_HEAD(coloc_ap_list);
bool need_scan_psc = true;
const struct ieee80211_sband_iftype_data *iftd;
+ size_t size, offs_ssids, offs_6ghz_params, offs_ies;
rdev_req->scan_6ghz = true;
@@ -878,10 +878,15 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
spin_unlock_bh(&rdev->bss_lock);
}
- request = kzalloc(struct_size(request, channels, n_channels) +
- sizeof(*request->scan_6ghz_params) * count +
- sizeof(*request->ssids) * rdev_req->n_ssids,
- GFP_KERNEL);
+ size = struct_size(request, channels, n_channels);
+ offs_ssids = size;
+ size += sizeof(*request->ssids) * rdev_req->n_ssids;
+ offs_6ghz_params = size;
+ size += sizeof(*request->scan_6ghz_params) * count;
+ offs_ies = size;
+ size += rdev_req->ie_len;
+
+ request = kzalloc(size, GFP_KERNEL);
if (!request) {
cfg80211_free_coloc_ap_list(&coloc_ap_list);
return -ENOMEM;
@@ -889,8 +894,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
*request = *rdev_req;
request->n_channels = 0;
- request->scan_6ghz_params =
- (void *)&request->channels[n_channels];
+ request->n_6ghz_params = 0;
+ if (rdev_req->n_ssids) {
+ /*
+ * Add the ssids from the parent scan request to the new
+ * scan request, so the driver would be able to use them
+ * in its probe requests to discover hidden APs on PSC
+ * channels.
+ */
+ request->ssids = (void *)request + offs_ssids;
+ memcpy(request->ssids, rdev_req->ssids,
+ sizeof(*request->ssids) * request->n_ssids);
+ }
+ request->scan_6ghz_params = (void *)request + offs_6ghz_params;
+
+ if (rdev_req->ie_len) {
+ void *ie = (void *)request + offs_ies;
+
+ memcpy(ie, rdev_req->ie, rdev_req->ie_len);
+ request->ie = ie;
+ }
/*
* PSC channels should not be scanned in case of direct scan with 1 SSID
@@ -979,17 +1002,8 @@ skip:
if (request->n_channels) {
struct cfg80211_scan_request *old = rdev->int_scan_req;
- rdev->int_scan_req = request;
- /*
- * Add the ssids from the parent scan request to the new scan
- * request, so the driver would be able to use them in its
- * probe requests to discover hidden APs on PSC channels.
- */
- request->ssids = (void *)&request->channels[request->n_channels];
- request->n_ssids = rdev_req->n_ssids;
- memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
- request->n_ssids);
+ rdev->int_scan_req = request;
/*
* If this scan follows a previous scan, save the scan start
@@ -2122,6 +2136,40 @@ struct cfg80211_inform_single_bss_data {
u64 cannot_use_reasons;
};
+static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen,
+ const u32 flags)
+{
+ const struct element *tmp;
+ struct ieee80211_he_operation *he_oper;
+
+ tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
+ if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 &&
+ tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
+ he_oper = (void *)&tmp->data[1];
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper)
+ return false;
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP:
+ return true;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT);
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT);
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *
cfg80211_inform_single_bss_data(struct wiphy *wiphy,
@@ -2154,6 +2202,14 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
if (!channel)
return NULL;
+ if (channel->band == NL80211_BAND_6GHZ &&
+ !cfg80211_6ghz_power_type_valid(data->ie, data->ielen,
+ channel->flags)) {
+ data->use_for = 0;
+ data->cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH;
+ }
+
memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN);
tmp.pub.channel = channel;
if (data->bss_source != BSS_SOURCE_STA_PROFILE)
@@ -2165,15 +2221,22 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.ts_boottime = drv_data->boottime_ns;
tmp.parent_tsf = drv_data->parent_tsf;
ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid);
+ tmp.pub.chains = drv_data->chains;
+ memcpy(tmp.pub.chain_signal, drv_data->chain_signal,
+ IEEE80211_MAX_CHAINS);
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
- if (data->bss_source != BSS_SOURCE_DIRECT) {
+ switch (data->bss_source) {
+ case BSS_SOURCE_MBSSID:
tmp.pub.transmitted_bss = data->source_bss;
+ fallthrough;
+ case BSS_SOURCE_STA_PROFILE:
ts = bss_from_pub(data->source_bss)->ts;
tmp.pub.bssid_index = data->bssid_index;
tmp.pub.max_bssid_indicator = data->max_bssid_indicator;
- } else {
+ break;
+ case BSS_SOURCE_DIRECT:
ts = jiffies;
if (channel->band == NL80211_BAND_60GHZ) {
@@ -2188,6 +2251,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
regulatory_hint_found_beacon(wiphy, channel,
gfp);
}
+ break;
}
/*
@@ -2208,6 +2272,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
switch (data->ftype) {
case CFG80211_BSS_FTYPE_BEACON:
+ case CFG80211_BSS_FTYPE_S1G_BEACON:
ies->from_beacon = true;
fallthrough;
case CFG80211_BSS_FTYPE_UNKNOWN:
@@ -2403,7 +2468,8 @@ cfg80211_parse_mbssid_data(struct wiphy *wiphy,
profile, profile_len);
if (!mbssid_index_ie || mbssid_index_ie[1] < 1 ||
mbssid_index_ie[2] == 0 ||
- mbssid_index_ie[2] > 46) {
+ mbssid_index_ie[2] > 46 ||
+ mbssid_index_ie[2] >= (1 << elem->data[0])) {
/* No valid Multiple BSSID-Index element */
continue;
}
@@ -2464,16 +2530,22 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
if (elem->id == WLAN_EID_EXTENSION) {
copied = elem->datalen - 1;
- if (copied > data_len)
- return -ENOSPC;
- memmove(data, elem->data + 1, copied);
+ if (data) {
+ if (copied > data_len)
+ return -ENOSPC;
+
+ memmove(data, elem->data + 1, copied);
+ }
} else {
copied = elem->datalen;
- if (copied > data_len)
- return -ENOSPC;
- memmove(data, elem->data, copied);
+ if (data) {
+ if (copied > data_len)
+ return -ENOSPC;
+
+ memmove(data, elem->data, copied);
+ }
}
/* Fragmented elements must have 255 bytes */
@@ -2492,10 +2564,13 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
elem_datalen = elem->datalen;
- if (copied + elem_datalen > data_len)
- return -ENOSPC;
+ if (data) {
+ if (copied + elem_datalen > data_len)
+ return -ENOSPC;
+
+ memmove(data + copied, elem->data, elem_datalen);
+ }
- memmove(data + copied, elem->data, elem_datalen);
copied += elem_datalen;
/* Only the last fragment may be short */
@@ -2601,79 +2676,80 @@ error:
return NULL;
}
-static u8
-cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
- const struct ieee80211_neighbor_ap_info **ap_info,
- u8 *param_ch_count)
-{
- const struct ieee80211_neighbor_ap_info *info;
- const struct element *rnr;
- const u8 *pos, *end;
-
- for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT, ie, ielen) {
- pos = rnr->data;
- end = rnr->data + rnr->datalen;
-
- /* RNR IE may contain more than one NEIGHBOR_AP_INFO */
- while (sizeof(*info) <= end - pos) {
- const struct ieee80211_rnr_mld_params *mld_params;
- u16 params;
- u8 length, i, count, mld_params_offset;
- u8 type, lid;
- u32 use_for;
-
- info = (void *)pos;
- count = u8_get_bits(info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1;
- length = info->tbtt_info_len;
+struct tbtt_info_iter_data {
+ const struct ieee80211_neighbor_ap_info *ap_info;
+ u8 param_ch_count;
+ u32 use_for;
+ u8 mld_id, link_id;
+ bool non_tx;
+};
- pos += sizeof(*info);
+static enum cfg80211_rnr_iter_ret
+cfg802121_mld_ap_rnr_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ const struct ieee80211_rnr_mld_params *mld_params;
+ struct tbtt_info_iter_data *data = _data;
+ u8 link_id;
+ bool non_tx = false;
+
+ if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
+ tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+ mld_params)) {
+ const struct ieee80211_tbtt_info_ge_11 *tbtt_info_ge_11 =
+ (void *)tbtt_info;
+
+ non_tx = (tbtt_info_ge_11->bss_params &
+ (IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID |
+ IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID)) ==
+ IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID;
+ mld_params = &tbtt_info_ge_11->mld_params;
+ } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
+ tbtt_info_len >= sizeof(struct ieee80211_rnr_mld_params))
+ mld_params = (void *)tbtt_info;
+ else
+ return RNR_ITER_CONTINUE;
- if (count * length > end - pos)
- return 0;
+ link_id = le16_get_bits(mld_params->params,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID);
- type = u8_get_bits(info->tbtt_info_hdr,
- IEEE80211_AP_INFO_TBTT_HDR_TYPE);
+ if (data->mld_id != mld_params->mld_id)
+ return RNR_ITER_CONTINUE;
- if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
- length >=
- offsetofend(struct ieee80211_tbtt_info_ge_11,
- mld_params)) {
- mld_params_offset =
- offsetof(struct ieee80211_tbtt_info_ge_11, mld_params);
- use_for = NL80211_BSS_USE_FOR_ALL;
- } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
- length >= sizeof(struct ieee80211_rnr_mld_params)) {
- mld_params_offset = 0;
- use_for = NL80211_BSS_USE_FOR_MLD_LINK;
- } else {
- pos += count * length;
- continue;
- }
+ if (data->link_id != link_id)
+ return RNR_ITER_CONTINUE;
- for (i = 0; i < count; i++) {
- mld_params = (void *)pos + mld_params_offset;
- params = le16_to_cpu(mld_params->params);
+ data->ap_info = info;
+ data->param_ch_count =
+ le16_get_bits(mld_params->params,
+ IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+ data->non_tx = non_tx;
- lid = u16_get_bits(params,
- IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+ if (type == IEEE80211_TBTT_INFO_TYPE_TBTT)
+ data->use_for = NL80211_BSS_USE_FOR_ALL;
+ else
+ data->use_for = NL80211_BSS_USE_FOR_MLD_LINK;
+ return RNR_ITER_BREAK;
+}
- if (mld_id == mld_params->mld_id &&
- link_id == lid) {
- *ap_info = info;
- *param_ch_count =
- le16_get_bits(mld_params->params,
- IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+static u8
+cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
+ const struct ieee80211_neighbor_ap_info **ap_info,
+ u8 *param_ch_count, bool *non_tx)
+{
+ struct tbtt_info_iter_data data = {
+ .mld_id = mld_id,
+ .link_id = link_id,
+ };
- return use_for;
- }
+ cfg80211_iter_rnr(ie, ielen, cfg802121_mld_ap_rnr_iter, &data);
- pos += length;
- }
- }
- }
+ *ap_info = data.ap_info;
+ *param_ch_count = data.param_ch_count;
+ *non_tx = data.non_tx;
- return 0;
+ return data.use_for;
}
static struct element *
@@ -2795,17 +2871,16 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
struct cfg80211_bss *bss;
u8 mld_id, reporter_link_id, bss_change_count;
u16 seen_links = 0;
- const u8 *pos;
u8 i;
- if (!ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
+ if (!ieee80211_mle_type_ok(elem->data + 1,
+ IEEE80211_ML_CONTROL_TYPE_BASIC,
+ elem->datalen - 1))
return;
- ml_elem = (void *)elem->data + 1;
+ ml_elem = (void *)(elem->data + 1);
control = le16_to_cpu(ml_elem->control);
- if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC)
- return;
+ ml_common_len = ml_elem->variable[0];
/* Must be present when transmitted by an AP (in a probe response) */
if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) ||
@@ -2813,24 +2888,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
return;
- ml_common_len = ml_elem->variable[0];
-
- /* length + MLD MAC address */
- pos = ml_elem->variable + 1 + 6;
-
- reporter_link_id = pos[0];
- pos += 1;
-
- bss_change_count = pos[0];
- pos += 1;
-
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY))
- pos += 2;
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_EML_CAPA))
- pos += 2;
-
- /* MLD capabilities and operations */
- pos += 2;
+ reporter_link_id = ieee80211_mle_get_link_id(elem->data + 1);
+ bss_change_count = ieee80211_mle_get_bss_param_ch_cnt(elem->data + 1);
/*
* The MLD ID of the reporting AP is always zero. It is set if the AP
@@ -2838,15 +2897,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
* relating to a nontransmitted BSS (matching the Multi-BSSID Index,
* Draft P802.11be_D3.2, 35.3.4.2)
*/
- if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) {
- mld_id = *pos;
- pos += 1;
- } else {
- mld_id = 0;
- }
-
- /* Extended MLD capabilities and operations */
- pos += 2;
+ mld_id = ieee80211_mle_get_mld_id(elem->data + 1);
/* Fully defrag the ML element for sta information/profile iteration */
mle = cfg80211_defrag_mle(elem, tx_data->ie, tx_data->ielen, gfp);
@@ -2876,6 +2927,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
ssize_t profile_len;
u8 param_ch_count;
u8 link_id, use_for;
+ bool non_tx;
if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i],
mle->sta_prof_len[i]))
@@ -2921,10 +2973,24 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
tx_data->ielen,
mld_id, link_id,
&ap_info,
- &param_ch_count);
+ &param_ch_count,
+ &non_tx);
if (!use_for)
continue;
+ /*
+ * As of 802.11be_D5.0, the specification does not give us any
+ * way of discovering both the MaxBSSID and the Multiple-BSSID
+ * Index. It does seem like the Multiple-BSSID Index element
+ * may be provided, but section 9.4.2.45 explicitly forbids
+ * including a Multiple-BSSID Element (in this case without any
+ * subelements).
+ * Without both pieces of information we cannot calculate the
+ * reference BSSID, so simply ignore the BSS.
+ */
+ if (non_tx)
+ continue;
+
/* We could sanity check the BSSID is included */
if (!ieee80211_operating_class_to_band(ap_info->op_class,
@@ -3052,6 +3118,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
if (!res)
return NULL;
+ /* don't do any further MBSSID/ML handling for S1G */
+ if (ftype == CFG80211_BSS_FTYPE_S1G_BEACON)
+ return res;
+
cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp);
cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp);
@@ -3060,59 +3130,21 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_inform_bss_data);
-static bool cfg80211_uhb_power_type_valid(const u8 *ie,
- size_t ielen,
- const u32 flags)
-{
- const struct element *tmp;
- struct ieee80211_he_operation *he_oper;
-
- tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
- if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
- const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
-
- he_oper = (void *)&tmp->data[1];
- he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
-
- if (!he_6ghz_oper)
- return false;
-
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
- case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
- return true;
- case IEEE80211_6GHZ_CTRL_REG_SP_AP:
- return !(flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT);
- case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
- return !(flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT);
- }
- }
- return false;
-}
-
-/* cfg80211_inform_bss_width_frame helper */
-static struct cfg80211_bss *
-cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
- struct cfg80211_inform_bss *data,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
+struct cfg80211_bss *
+cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
+ struct cfg80211_inform_bss *data,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ gfp_t gfp)
{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct cfg80211_internal_bss tmp = {}, *res;
- struct cfg80211_bss_ies *ies;
- struct ieee80211_channel *channel;
- bool signal_valid;
+ size_t min_hdr_len;
struct ieee80211_ext *ext = NULL;
- u8 *bssid, *variable;
- u16 capability, beacon_int;
- size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt,
- u.probe_resp.variable);
- int bss_type;
-
- BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
- offsetof(struct ieee80211_mgmt, u.beacon.variable));
-
- trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
+ enum cfg80211_bss_frame_type ftype;
+ u16 beacon_interval;
+ const u8 *bssid;
+ u16 capability;
+ const u8 *ie;
+ size_t ielen;
+ u64 tsf;
if (WARN_ON(!mgmt))
return NULL;
@@ -3120,48 +3152,40 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
if (WARN_ON(!wiphy))
return NULL;
- if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
- (data->signal < 0 || data->signal > 100)))
- return NULL;
+ BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
+ offsetof(struct ieee80211_mgmt, u.beacon.variable));
+
+ trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
ext = (void *) mgmt;
- min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon);
if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
min_hdr_len = offsetof(struct ieee80211_ext,
u.s1g_short_beacon.variable);
+ else
+ min_hdr_len = offsetof(struct ieee80211_ext,
+ u.s1g_beacon.variable);
+ } else {
+ /* same for beacons */
+ min_hdr_len = offsetof(struct ieee80211_mgmt,
+ u.probe_resp.variable);
}
if (WARN_ON(len < min_hdr_len))
return NULL;
ielen = len - min_hdr_len;
- variable = mgmt->u.probe_resp.variable;
- if (ext) {
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- variable = ext->u.s1g_short_beacon.variable;
- else
- variable = ext->u.s1g_beacon.variable;
- }
-
- channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan);
- if (!channel)
- return NULL;
-
- if (channel->band == NL80211_BAND_6GHZ &&
- !cfg80211_uhb_power_type_valid(variable, ielen, channel->flags)) {
- data->restrict_use = 1;
- data->use_for = 0;
- data->cannot_use_reasons =
- NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH;
- }
-
+ ie = mgmt->u.probe_resp.variable;
if (ext) {
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
- elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT,
- variable, ielen);
+ if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ ie = ext->u.s1g_short_beacon.variable;
+ else
+ ie = ext->u.s1g_beacon.variable;
+
+ elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen);
if (!elem)
return NULL;
if (elem->datalen < sizeof(*compat))
@@ -3169,112 +3193,26 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
compat = (void *)elem->data;
bssid = ext->u.s1g_beacon.sa;
capability = le16_to_cpu(compat->compat_info);
- beacon_int = le16_to_cpu(compat->beacon_int);
+ beacon_interval = le16_to_cpu(compat->beacon_int);
} else {
bssid = mgmt->bssid;
- beacon_int = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
+ beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
}
- if (channel->band == NL80211_BAND_60GHZ) {
- bss_type = capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
- if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
- bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
- regulatory_hint_found_beacon(wiphy, channel, gfp);
- } else {
- if (capability & WLAN_CAPABILITY_ESS)
- regulatory_hint_found_beacon(wiphy, channel, gfp);
- }
-
- ies = kzalloc(sizeof(*ies) + ielen, gfp);
- if (!ies)
- return NULL;
- ies->len = ielen;
- ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
- ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control) ||
- ieee80211_is_s1g_beacon(mgmt->frame_control);
- memcpy(ies->data, variable, ielen);
+ tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
if (ieee80211_is_probe_resp(mgmt->frame_control))
- rcu_assign_pointer(tmp.pub.proberesp_ies, ies);
+ ftype = CFG80211_BSS_FTYPE_PRESP;
+ else if (ext)
+ ftype = CFG80211_BSS_FTYPE_S1G_BEACON;
else
- rcu_assign_pointer(tmp.pub.beacon_ies, ies);
- rcu_assign_pointer(tmp.pub.ies, ies);
-
- memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
- tmp.pub.beacon_interval = beacon_int;
- tmp.pub.capability = capability;
- tmp.pub.channel = channel;
- tmp.pub.signal = data->signal;
- tmp.ts_boottime = data->boottime_ns;
- tmp.parent_tsf = data->parent_tsf;
- tmp.pub.chains = data->chains;
- memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
- ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
- tmp.pub.use_for = data->restrict_use ?
- data->use_for :
- NL80211_BSS_USE_FOR_ALL;
- tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
-
- signal_valid = data->chan == channel;
- spin_lock_bh(&rdev->bss_lock);
- res = __cfg80211_bss_update(rdev, &tmp, signal_valid, jiffies);
- if (!res)
- goto drop;
-
- rdev_inform_bss(rdev, &res->pub, ies, data->drv_data);
-
- spin_unlock_bh(&rdev->bss_lock);
-
- trace_cfg80211_return_bss(&res->pub);
- /* __cfg80211_bss_update gives us a referenced result */
- return &res->pub;
-
-drop:
- spin_unlock_bh(&rdev->bss_lock);
- return NULL;
-}
-
-struct cfg80211_bss *
-cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
- struct cfg80211_inform_bss *data,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
-{
- struct cfg80211_inform_single_bss_data inform_data = {
- .drv_data = data,
- .ie = mgmt->u.probe_resp.variable,
- .ielen = len - offsetof(struct ieee80211_mgmt,
- u.probe_resp.variable),
- .use_for = data->restrict_use ?
- data->use_for :
- NL80211_BSS_USE_FOR_ALL,
- .cannot_use_reasons = data->cannot_use_reasons,
- };
- struct cfg80211_bss *res;
-
- res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
- len, gfp);
- if (!res)
- return NULL;
-
- /* don't do any further MBSSID/ML handling for S1G */
- if (ieee80211_is_s1g_beacon(mgmt->frame_control))
- return res;
-
- inform_data.ftype = ieee80211_is_beacon(mgmt->frame_control) ?
- CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP;
- memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN);
- inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
- inform_data.beacon_interval =
- le16_to_cpu(mgmt->u.probe_resp.beacon_int);
+ ftype = CFG80211_BSS_FTYPE_BEACON;
- /* process each non-transmitting bss */
- cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp);
-
- cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp);
-
- return res;
+ return cfg80211_inform_bss_data(wiphy, data, ftype,
+ bssid, tsf, capability,
+ beacon_interval, ie, ielen,
+ gfp);
}
EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
@@ -3483,10 +3421,14 @@ int cfg80211_wext_siwscan(struct net_device *dev,
wiphy = &rdev->wiphy;
/* Determine number of channels, needed to allocate creq */
- if (wreq && wreq->num_channels)
+ if (wreq && wreq->num_channels) {
+ /* Passed from userspace so should be checked */
+ if (unlikely(wreq->num_channels > IW_MAX_FREQUENCIES))
+ return -EINVAL;
n_channels = wreq->num_channels;
- else
+ } else {
n_channels = ieee80211_get_num_supported_channels(wiphy);
+ }
creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
n_channels * sizeof(void *),
@@ -3560,8 +3502,10 @@ int cfg80211_wext_siwscan(struct net_device *dev,
memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
creq->ssids[0].ssid_len = wreq->essid_len;
}
- if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
+ if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) {
+ creq->ssids = NULL;
creq->n_ssids = 0;
+ }
}
for (i = 0; i < NUM_NL80211_BANDS; i++)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 195c853273..1cfe673bc5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -209,7 +209,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
if (!req.bss) {
err = -ENOENT;
} else {
- err = cfg80211_mlme_assoc(rdev, wdev->netdev, &req);
+ err = cfg80211_mlme_assoc(rdev, wdev->netdev,
+ &req, NULL);
cfg80211_put_bss(&rdev->wiphy, req.bss);
}
@@ -1044,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev,
cfg80211_hold_bss(
bss_from_pub(params->links[link].bss));
ev->cr.links[link].bss = params->links[link].bss;
+ ev->cr.links[link].status = params->links[link].status;
if (params->links[link].addr) {
ev->cr.links[link].addr = next;
@@ -1352,6 +1354,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
return;
cfg80211_wdev_release_bsses(wdev);
+ wdev->valid_links = 0;
wdev->connected = false;
wdev->u.client.ssid_len = 0;
wdev->conn_owner_nlportid = 0;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 565511a3f4..62f26618f6 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021, 2023 Intel Corporation
+ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
*/
#include <linux/device.h>
@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev)
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
rdev->suspended = false;
- schedule_work(&rdev->wiphy_work);
+ queue_work(system_unbound_wq, &rdev->wiphy_work);
wiphy_unlock(&rdev->wiphy);
if (ret)
diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile
index 1f6622fcb7..c364e63b50 100644
--- a/net/wireless/tests/Makefile
+++ b/net/wireless/tests/Makefile
@@ -1,3 +1,3 @@
-cfg80211-tests-y += module.o fragmentation.o scan.o util.o
+cfg80211-tests-y += module.o fragmentation.o scan.o util.o chan.o
obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o
diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c
new file mode 100644
index 0000000000..d02258ac2d
--- /dev/null
+++ b/net/wireless/tests/chan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for channel helper functions
+ *
+ * Copyright (C) 2023-2024 Intel Corporation
+ */
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static struct ieee80211_channel chan_6ghz_1 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 5955,
+};
+
+static struct ieee80211_channel chan_6ghz_5 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 5975,
+};
+
+static struct ieee80211_channel chan_6ghz_105 = {
+ .band = NL80211_BAND_6GHZ,
+ .center_freq = 6475,
+};
+
+static const struct chandef_compat_case {
+ const char *desc;
+ /* leave c1 empty for tests for identical */
+ struct cfg80211_chan_def c1, c2;
+ /* we test both ways around, so c2 should always be the compat one */
+ bool compat;
+} chandef_compat_cases[] = {
+ {
+ .desc = "identical non-HT",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20_NOHT,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 20 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 40 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_40,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 80 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_80,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 160 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "identical 320 MHz",
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40 + 80,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "20 MHz in 320 MHz\n",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955 + 10 + 20 + 40 + 80,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "different 20 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_1,
+ .center_freq1 = 5955,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_20,
+ .chan = &chan_6ghz_5,
+ .center_freq1 = 5975,
+ },
+ },
+ {
+ .desc = "different primary 160 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 150,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ },
+ },
+ {
+ /* similar to previous test but one has lower BW */
+ .desc = "matching primary 160 MHz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured secondary 160 Mhz",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xf,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured matching",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ .punctured = 0xc0,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xc000,
+ },
+ .compat = true,
+ },
+ {
+ .desc = "matching primary 160 MHz & punctured not matching",
+ .c1 = {
+ .width = NL80211_CHAN_WIDTH_160,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 + 70,
+ .punctured = 0x80,
+ },
+ .c2 = {
+ .width = NL80211_CHAN_WIDTH_320,
+ .chan = &chan_6ghz_105,
+ .center_freq1 = 6475 - 10,
+ .punctured = 0xc000,
+ },
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(chandef_compat, chandef_compat_cases, desc)
+
+static void test_chandef_compat(struct kunit *test)
+{
+ const struct chandef_compat_case *params = test->param_value;
+ const struct cfg80211_chan_def *ret, *expect;
+ struct cfg80211_chan_def c1 = params->c1;
+
+ /* tests with identical ones */
+ if (!params->c1.chan)
+ c1 = params->c2;
+
+ KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(&c1), true);
+ KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(&params->c2), true);
+
+ expect = params->compat ? &params->c2 : NULL;
+
+ ret = cfg80211_chandef_compatible(&c1, &params->c2);
+ KUNIT_EXPECT_PTR_EQ(test, ret, expect);
+
+ if (!params->c1.chan)
+ expect = &c1;
+
+ ret = cfg80211_chandef_compatible(&params->c2, &c1);
+ KUNIT_EXPECT_PTR_EQ(test, ret, expect);
+}
+
+static struct kunit_case chandef_compat_test_cases[] = {
+ KUNIT_CASE_PARAM(test_chandef_compat, chandef_compat_gen_params),
+ {}
+};
+
+static struct kunit_suite chandef_compat = {
+ .name = "cfg80211-chandef-compat",
+ .test_cases = chandef_compat_test_cases,
+};
+
+kunit_test_suite(chandef_compat);
diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c
index 49a339ca88..411fae18cd 100644
--- a/net/wireless/tests/fragmentation.c
+++ b/net/wireless/tests/fragmentation.c
@@ -2,7 +2,7 @@
/*
* KUnit tests for element fragmentation
*
- * Copyright (C) 2023 Intel Corporation
+ * Copyright (C) 2023-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <net/cfg80211.h>
@@ -27,7 +27,12 @@ static void defragment_0(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 253);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
KUNIT_EXPECT_EQ(test, ret, 253);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253);
@@ -63,7 +68,12 @@ static void defragment_1(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
/* this means the last fragment was not used */
KUNIT_EXPECT_EQ(test, ret, 254 + 7);
@@ -106,10 +116,15 @@ static void defragment_2(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
WLAN_EID_FRAGMENT);
/* this means the last fragment was not used */
KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255);
KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1);
@@ -134,7 +149,12 @@ static void defragment_at_end(struct kunit *test)
ret = cfg80211_defragment_element((void *)input,
input, sizeof(input),
- data, sizeof(input),
+ NULL, 0,
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, ret,
WLAN_EID_FRAGMENT);
KUNIT_EXPECT_EQ(test, ret, 254 + 7);
KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
index f9ea44aee9..9f458be716 100644
--- a/net/wireless/tests/scan.c
+++ b/net/wireless/tests/scan.c
@@ -407,6 +407,7 @@ static struct inform_bss_ml_sta_case {
int mld_id;
bool sta_prof_vendor_elems;
bool include_oper_class;
+ bool nstr;
} inform_bss_ml_sta_cases[] = {
{
.desc = "zero_mld_id",
@@ -426,6 +427,10 @@ static struct inform_bss_ml_sta_case {
.mld_id = 1,
.sta_prof_vendor_elems = true,
.include_oper_class = true,
+ }, {
+ .desc = "nstr",
+ .mld_id = 0,
+ .nstr = true,
},
};
KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc)
@@ -458,7 +463,7 @@ static void test_inform_bss_ml_sta(struct kunit *test)
struct {
struct ieee80211_neighbor_ap_info info;
struct ieee80211_tbtt_info_ge_11 ap;
- } __packed rnr = {
+ } __packed rnr_normal = {
.info = {
.tbtt_info_hdr = u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT),
.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_ge_11),
@@ -478,6 +483,28 @@ static void test_inform_bss_ml_sta(struct kunit *test)
}
};
struct {
+ struct ieee80211_neighbor_ap_info info;
+ struct ieee80211_rnr_mld_params mld_params;
+ } __packed rnr_nstr = {
+ .info = {
+ .tbtt_info_hdr =
+ u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT) |
+ u8_encode_bits(IEEE80211_TBTT_INFO_TYPE_MLD,
+ IEEE80211_AP_INFO_TBTT_HDR_TYPE),
+ .tbtt_info_len = sizeof(struct ieee80211_rnr_mld_params),
+ .op_class = 81,
+ .channel = 11,
+ },
+ .mld_params = {
+ .mld_id = params->mld_id,
+ .params =
+ le16_encode_bits(link_id,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID),
+ }
+ };
+ size_t rnr_len = params->nstr ? sizeof(rnr_nstr) : sizeof(rnr_normal);
+ void *rnr = params->nstr ? (void *)&rnr_nstr : (void *)&rnr_normal;
+ struct {
__le16 control;
u8 var_len;
u8 mld_mac_addr[ETH_ALEN];
@@ -516,7 +543,7 @@ static void test_inform_bss_ml_sta(struct kunit *test)
u16_encode_bits(link_id,
IEEE80211_MLE_STA_CONTROL_LINK_ID)),
.var_len = sizeof(sta_prof) - 2 - 2,
- .bssid = { *rnr.ap.bssid },
+ .bssid = { *rnr_normal.ap.bssid },
.beacon_int = cpu_to_le16(101),
.tsf_offset = cpu_to_le64(-123ll),
.capabilities = cpu_to_le16(0xdead),
@@ -540,8 +567,8 @@ static void test_inform_bss_ml_sta(struct kunit *test)
}
skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
- skb_put_u8(input, sizeof(rnr));
- skb_put_data(input, &rnr, sizeof(rnr));
+ skb_put_u8(input, rnr_len);
+ skb_put_data(input, rnr, rnr_len);
/* build a multi-link element */
skb_put_u8(input, WLAN_EID_EXTENSION);
@@ -587,9 +614,10 @@ static void test_inform_bss_ml_sta(struct kunit *test)
KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 2);
/* Check link_bss *****************************************************/
- link_bss = cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
- IEEE80211_BSS_TYPE_ANY,
- IEEE80211_PRIVACY_ANY);
+ link_bss = __cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY,
+ 0);
KUNIT_ASSERT_NOT_NULL(test, link_bss);
KUNIT_EXPECT_EQ(test, link_bss->signal, 0);
KUNIT_EXPECT_EQ(test, link_bss->beacon_interval,
@@ -600,6 +628,22 @@ static void test_inform_bss_ml_sta(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, link_bss->channel,
ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2462)));
+ /* Test wiphy does not set WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY */
+ if (params->nstr) {
+ KUNIT_EXPECT_EQ(test, link_bss->use_for, 0);
+ KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons,
+ NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY);
+ KUNIT_EXPECT_NULL(test,
+ cfg80211_get_bss(wiphy, NULL, sta_prof.bssid,
+ NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY));
+ } else {
+ KUNIT_EXPECT_EQ(test, link_bss->use_for,
+ NL80211_BSS_USE_FOR_ALL);
+ KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons, 0);
+ }
+
rcu_read_lock();
ies = rcu_dereference(link_bss->ies);
KUNIT_EXPECT_NOT_NULL(test, ies);
@@ -607,20 +651,20 @@ static void test_inform_bss_ml_sta(struct kunit *test)
/* Resulting length should be:
* SSID (inherited) + RNR (inherited) + vendor element(s) +
* operating class (if requested) +
- * generated RNR (if MLD ID == 0) +
+ * generated RNR (if MLD ID == 0 and not NSTR) +
* MLE common info + MLE header and control
*/
if (params->sta_prof_vendor_elems)
KUNIT_EXPECT_EQ(test, ies->len,
- 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 +
+ 6 + 2 + rnr_len + 2 + 160 + 2 + 165 +
(params->include_oper_class ? 3 : 0) +
- (!params->mld_id ? 22 : 0) +
+ (!params->mld_id && !params->nstr ? 22 : 0) +
mle_basic_common_info.var_len + 5);
else
KUNIT_EXPECT_EQ(test, ies->len,
- 6 + 2 + sizeof(rnr) + 2 + 155 +
+ 6 + 2 + rnr_len + 2 + 155 +
(params->include_oper_class ? 3 : 0) +
- (!params->mld_id ? 22 : 0) +
+ (!params->mld_id && !params->nstr ? 22 : 0) +
mle_basic_common_info.var_len + 5);
rcu_read_unlock();
@@ -628,6 +672,172 @@ static void test_inform_bss_ml_sta(struct kunit *test)
cfg80211_put_bss(wiphy, link_bss);
}
+static struct cfg80211_parse_colocated_ap_case {
+ const char *desc;
+ u8 op_class;
+ u8 channel;
+ struct ieee80211_neighbor_ap_info info;
+ union {
+ struct ieee80211_tbtt_info_ge_11 tbtt_long;
+ struct ieee80211_tbtt_info_7_8_9 tbtt_short;
+ };
+ bool add_junk;
+ bool same_ssid;
+ bool valid;
+} cfg80211_parse_colocated_ap_cases[] = {
+ {
+ .desc = "wrong_band",
+ .info.op_class = 81,
+ .info.channel = 11,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "wrong_type",
+ /* IEEE80211_AP_INFO_TBTT_HDR_TYPE is in the least significant bits */
+ .info.tbtt_info_hdr = IEEE80211_TBTT_INFO_TYPE_MLD,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_invalid_len_short",
+ .info.tbtt_info_len = 6,
+ .tbtt_short = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_invalid_len_short_mld",
+ .info.tbtt_info_len = 10,
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = false,
+ },
+ {
+ .desc = "colocated_non_mld",
+ .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9),
+ .tbtt_short = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .same_ssid = true,
+ .valid = true,
+ },
+ {
+ .desc = "colocated_non_mld_invalid_bssid",
+ .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9),
+ .tbtt_short = {
+ .bssid = { 0xff, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP |
+ IEEE80211_RNR_TBTT_PARAMS_SAME_SSID,
+ },
+ .same_ssid = true,
+ .valid = false,
+ },
+ {
+ .desc = "colocated_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .valid = true,
+ },
+ {
+ .desc = "colocated_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ },
+ .add_junk = true,
+ .valid = false,
+ },
+ {
+ .desc = "colocated_disabled_mld",
+ .tbtt_long = {
+ .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
+ .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP,
+ .mld_params.params = cpu_to_le16(IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK),
+ },
+ .valid = false,
+ },
+};
+KUNIT_ARRAY_PARAM_DESC(cfg80211_parse_colocated_ap, cfg80211_parse_colocated_ap_cases, desc)
+
+static void test_cfg80211_parse_colocated_ap(struct kunit *test)
+{
+ const struct cfg80211_parse_colocated_ap_case *params = test->param_value;
+ struct sk_buff *input = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct cfg80211_bss_ies *ies;
+ struct ieee80211_neighbor_ap_info info;
+ LIST_HEAD(coloc_ap_list);
+ int count;
+
+ KUNIT_ASSERT_NOT_NULL(test, input);
+
+ info = params->info;
+
+ /* Reasonable values for a colocated AP */
+ if (!info.tbtt_info_len)
+ info.tbtt_info_len = sizeof(params->tbtt_long);
+ if (!info.op_class)
+ info.op_class = 131;
+ if (!info.channel)
+ info.channel = 33;
+ /* Zero is the correct default for .btt_info_hdr (one entry, TBTT type) */
+
+ skb_put_u8(input, WLAN_EID_SSID);
+ skb_put_u8(input, 4);
+ skb_put_data(input, "TEST", 4);
+
+ skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(input, sizeof(info) + info.tbtt_info_len + (params->add_junk ? 3 : 0));
+ skb_put_data(input, &info, sizeof(info));
+ skb_put_data(input, &params->tbtt_long, info.tbtt_info_len);
+
+ if (params->add_junk)
+ skb_put_data(input, "123", 3);
+
+ ies = kunit_kzalloc(test, struct_size(ies, data, input->len), GFP_KERNEL);
+ ies->len = input->len;
+ memcpy(ies->data, input->data, input->len);
+
+ count = cfg80211_parse_colocated_ap(ies, &coloc_ap_list);
+
+ KUNIT_EXPECT_EQ(test, count, params->valid);
+ KUNIT_EXPECT_EQ(test, list_count_nodes(&coloc_ap_list), params->valid);
+
+ if (params->valid && !list_empty(&coloc_ap_list)) {
+ struct cfg80211_colocated_ap *ap;
+
+ ap = list_first_entry(&coloc_ap_list, typeof(*ap), list);
+ if (info.tbtt_info_len <= sizeof(params->tbtt_short))
+ KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_short.bssid, ETH_ALEN);
+ else
+ KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_long.bssid, ETH_ALEN);
+
+ if (params->same_ssid) {
+ KUNIT_EXPECT_EQ(test, ap->ssid_len, 4);
+ KUNIT_EXPECT_MEMEQ(test, ap->ssid, "TEST", 4);
+ } else {
+ KUNIT_EXPECT_EQ(test, ap->ssid_len, 0);
+ }
+ }
+
+ cfg80211_free_coloc_ap_list(&coloc_ap_list);
+}
+
static struct kunit_case gen_new_ie_test_cases[] = {
KUNIT_CASE_PARAM(test_gen_new_ie, gen_new_ie_gen_params),
KUNIT_CASE(test_gen_new_ie_malformed),
@@ -653,3 +863,16 @@ static struct kunit_suite inform_bss = {
};
kunit_test_suite(inform_bss);
+
+static struct kunit_case scan_6ghz_cases[] = {
+ KUNIT_CASE_PARAM(test_cfg80211_parse_colocated_ap,
+ cfg80211_parse_colocated_ap_gen_params),
+ {}
+};
+
+static struct kunit_suite scan_6ghz = {
+ .name = "cfg80211-scan-6ghz",
+ .test_cases = scan_6ghz_cases,
+};
+
+kunit_test_suite(scan_6ghz);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 1f374c8a17..87986170d1 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,4 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright(c) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018, 2020-2024 Intel Corporation
+ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg80211
@@ -135,7 +140,8 @@
__field(u32, width) \
__field(u32, center_freq1) \
__field(u32, freq1_offset) \
- __field(u32, center_freq2)
+ __field(u32, center_freq2) \
+ __field(u16, punctured)
#define CHAN_DEF_ASSIGN(chandef) \
do { \
if ((chandef) && (chandef)->chan) { \
@@ -148,6 +154,7 @@
__entry->center_freq1 = (chandef)->center_freq1;\
__entry->freq1_offset = (chandef)->freq1_offset;\
__entry->center_freq2 = (chandef)->center_freq2;\
+ __entry->punctured = (chandef)->punctured; \
} else { \
__entry->band = 0; \
__entry->control_freq = 0; \
@@ -156,14 +163,15 @@
__entry->center_freq1 = 0; \
__entry->freq1_offset = 0; \
__entry->center_freq2 = 0; \
+ __entry->punctured = 0; \
} \
} while (0)
#define CHAN_DEF_PR_FMT \
- "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u"
+ "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u, punct: 0x%x"
#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \
__entry->freq_offset, __entry->width, \
__entry->center_freq1, __entry->freq1_offset, \
- __entry->center_freq2
+ __entry->center_freq2, __entry->punctured
#define FILS_AAD_ASSIGN(fa) \
do { \
@@ -364,7 +372,7 @@ TRACE_EVENT(rdev_add_virtual_intf,
),
TP_fast_assign(
WIPHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
),
TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d",
@@ -810,8 +818,8 @@ DECLARE_EVENT_CLASS(station_add_change,
params->link_sta_params.opmode_notif_used;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
- ", station flags mask: %u, station flags set: %u, "
- "station modify mask: %u, listen interval: %d, aid: %u, "
+ ", station flags mask: 0x%x, station flags set: 0x%x, "
+ "station modify mask: 0x%x, listen interval: %d, aid: %u, "
"plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
__entry->sta_flags_mask, __entry->sta_flags_set,
@@ -859,6 +867,7 @@ DECLARE_EVENT_CLASS(station_del,
MAC_ENTRY(sta_mac)
__field(u8, subtype)
__field(u16, reason_code)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -866,11 +875,13 @@ DECLARE_EVENT_CLASS(station_del,
MAC_ASSIGN(sta_mac, params->mac);
__entry->subtype = params->subtype;
__entry->reason_code = params->reason_code;
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
- ", subtype: %u, reason_code: %u",
+ ", subtype: %u, reason_code: %u, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
- __entry->subtype, __entry->reason_code)
+ __entry->subtype, __entry->reason_code,
+ __entry->link_id)
);
DEFINE_EVENT(station_del, rdev_del_station,
@@ -1013,7 +1024,7 @@ TRACE_EVENT(rdev_get_mpp,
TRACE_EVENT(rdev_dump_mpp,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
u8 *dst, u8 *mpp),
- TP_ARGS(wiphy, netdev, _idx, mpp, dst),
+ TP_ARGS(wiphy, netdev, _idx, dst, mpp),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1064,7 +1075,7 @@ TRACE_EVENT(rdev_return_int_mpath_info,
),
TP_printk(WIPHY_PR_FMT ", returned %d. mpath info - generation: %d, "
"filled: %u, frame qlen: %u, sn: %u, metric: %u, exptime: %u,"
- " discovery timeout: %u, discovery retries: %u, flags: %u",
+ " discovery timeout: %u, discovery retries: %u, flags: 0x%x",
WIPHY_PR_ARG, __entry->ret, __entry->generation,
__entry->filled, __entry->frame_qlen, __entry->sn,
__entry->metric, __entry->exptime, __entry->discovery_timeout,
@@ -1306,7 +1317,7 @@ TRACE_EVENT(rdev_assoc,
req->fils_nonces, 2 * FILS_NONCE_LEN);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
- ", previous bssid: %pM, use mfp: %s, flags: %u",
+ ", previous bssid: %pM, use mfp: %s, flags: 0x%x",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
__entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp),
__entry->flags)
@@ -1428,7 +1439,7 @@ TRACE_EVENT(rdev_connect,
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, "
- "flags: %u, previous bssid: %pM",
+ "flags: 0x%x, previous bssid: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid,
__entry->auth_type, BOOL_TO_STR(__entry->privacy),
__entry->wpa_versions, __entry->flags, __entry->prev_bssid)
@@ -1747,7 +1758,7 @@ TRACE_EVENT(rdev_return_void_tx_rx,
DECLARE_EVENT_CLASS(tx_rx_evt,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx),
+ TP_ARGS(wiphy, tx, rx),
TP_STRUCT__entry(
WIPHY_ENTRY
__field(u32, tx)
@@ -1764,7 +1775,7 @@ DECLARE_EVENT_CLASS(tx_rx_evt,
DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx)
+ TP_ARGS(wiphy, tx, rx)
);
DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
@@ -2324,6 +2335,7 @@ TRACE_EVENT(rdev_channel_switch,
__field(u8, count)
__dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
__dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
+ __field(u8, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2341,11 +2353,13 @@ TRACE_EVENT(rdev_channel_switch,
memcpy(__get_dynamic_array(pres_ofs),
params->counter_offsets_presp,
params->n_counter_offsets_presp * sizeof(u16));
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", block_tx: %d, count: %u, radar_required: %d",
+ ", block_tx: %d, count: %u, radar_required: %d, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->block_tx, __entry->count, __entry->radar_required)
+ __entry->block_tx, __entry->count, __entry->radar_required,
+ __entry->link_id)
);
TRACE_EVENT(rdev_set_qos_map,
@@ -2828,6 +2842,7 @@ TRACE_EVENT(rdev_color_change,
__field(u8, count)
__field(u16, bcn_ofs)
__field(u16, pres_ofs)
+ __field(u8, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2835,11 +2850,12 @@ TRACE_EVENT(rdev_color_change,
__entry->count = params->count;
__entry->bcn_ofs = params->counter_offset_beacon;
__entry->pres_ofs = params->counter_offset_presp;
+ __entry->link_id = params->link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
- ", count: %u",
+ ", count: %u, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG,
- __entry->count)
+ __entry->count, __entry->link_id)
);
TRACE_EVENT(rdev_set_radar_background,
@@ -3267,47 +3283,39 @@ TRACE_EVENT(cfg80211_chandef_dfs_required,
TRACE_EVENT(cfg80211_ch_switch_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id,
- u16 punct_bitmap),
- TP_ARGS(netdev, chandef, link_id, punct_bitmap),
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
- __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
- __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
- __entry->punct_bitmap)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_ch_switch_started_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id,
- u16 punct_bitmap),
- TP_ARGS(netdev, chandef, link_id, punct_bitmap),
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
- __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
- __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
- __entry->punct_bitmap)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_radar_event,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b9d15f3693..af6ec71956 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1504,7 +1504,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
5120, /* 0.833333... */
};
u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
- u32 rates_969[3] = { 480388888, 453700000, 408333333 };
+ u32 rates_996[3] = { 480388888, 453700000, 408333333 };
u32 rates_484[3] = { 229411111, 216666666, 195000000 };
u32 rates_242[3] = { 114711111, 108333333, 97500000 };
u32 rates_106[3] = { 40000000, 37777777, 34000000 };
@@ -1524,12 +1524,14 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
return 0;
- if (rate->bw == RATE_INFO_BW_160)
+ if (rate->bw == RATE_INFO_BW_160 ||
+ (rate->bw == RATE_INFO_BW_HE_RU &&
+ rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
result = rates_160M[rate->he_gi];
else if (rate->bw == RATE_INFO_BW_80 ||
(rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
- result = rates_969[rate->he_gi];
+ result = rates_996[rate->he_gi];
else if (rate->bw == RATE_INFO_BW_40 ||
(rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
@@ -2079,6 +2081,82 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
}
EXPORT_SYMBOL(ieee80211_operating_class_to_band);
+bool ieee80211_operating_class_to_chandef(u8 operating_class,
+ struct ieee80211_channel *chan,
+ struct cfg80211_chan_def *chandef)
+{
+ u32 control_freq, offset = 0;
+ enum nl80211_band band;
+
+ if (!ieee80211_operating_class_to_band(operating_class, &band) ||
+ !chan || band != chan->band)
+ return false;
+
+ control_freq = chan->center_freq;
+ chandef->chan = chan;
+
+ if (control_freq >= 5955)
+ offset = control_freq - 5955;
+ else if (control_freq >= 5745)
+ offset = control_freq - 5745;
+ else if (control_freq >= 5180)
+ offset = control_freq - 5180;
+ offset /= 20;
+
+ switch (operating_class) {
+ case 81: /* 2 GHz band; 20 MHz; channels 1..13 */
+ case 82: /* 2 GHz band; 20 MHz; channel 14 */
+ case 115: /* 5 GHz band; 20 MHz; channels 36,40,44,48 */
+ case 118: /* 5 GHz band; 20 MHz; channels 52,56,60,64 */
+ case 121: /* 5 GHz band; 20 MHz; channels 100..144 */
+ case 124: /* 5 GHz band; 20 MHz; channels 149,153,157,161 */
+ case 125: /* 5 GHz band; 20 MHz; channels 149..177 */
+ case 131: /* 6 GHz band; 20 MHz; channels 1..233*/
+ case 136: /* 6 GHz band; 20 MHz; channel 2 */
+ chandef->center_freq1 = control_freq;
+ chandef->width = NL80211_CHAN_WIDTH_20;
+ return true;
+ case 83: /* 2 GHz band; 40 MHz; channels 1..9 */
+ case 116: /* 5 GHz band; 40 MHz; channels 36,44 */
+ case 119: /* 5 GHz band; 40 MHz; channels 52,60 */
+ case 122: /* 5 GHz band; 40 MHz; channels 100,108,116,124,132,140 */
+ case 126: /* 5 GHz band; 40 MHz; channels 149,157,165,173 */
+ chandef->center_freq1 = control_freq + 10;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 84: /* 2 GHz band; 40 MHz; channels 5..13 */
+ case 117: /* 5 GHz band; 40 MHz; channels 40,48 */
+ case 120: /* 5 GHz band; 40 MHz; channels 56,64 */
+ case 123: /* 5 GHz band; 40 MHz; channels 104,112,120,128,136,144 */
+ case 127: /* 5 GHz band; 40 MHz; channels 153,161,169,177 */
+ chandef->center_freq1 = control_freq - 10;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 132: /* 6 GHz band; 40 MHz; channels 1,5,..,229*/
+ chandef->center_freq1 = control_freq + 10 - (offset & 1) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ return true;
+ case 128: /* 5 GHz band; 80 MHz; channels 36..64,100..144,149..177 */
+ case 133: /* 6 GHz band; 80 MHz; channels 1,5,..,229 */
+ chandef->center_freq1 = control_freq + 30 - (offset & 3) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ return true;
+ case 129: /* 5 GHz band; 160 MHz; channels 36..64,100..144,149..177 */
+ case 134: /* 6 GHz band; 160 MHz; channels 1,5,..,229 */
+ chandef->center_freq1 = control_freq + 70 - (offset & 7) * 20;
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ return true;
+ case 130: /* 5 GHz band; 80+80 MHz; channels 36..64,100..144,149..177 */
+ case 135: /* 6 GHz band; 80+80 MHz; channels 1,5,..,229 */
+ /* The center_freq2 of 80+80 MHz is unknown */
+ case 137: /* 6 GHz band; 320 MHz; channels 1,5,..,229 */
+ /* 320-1 or 320-2 channelization is unknown */
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL(ieee80211_operating_class_to_chandef);
+
bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
u8 *op_class)
{
@@ -2473,6 +2551,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
{
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
+ int ret;
wdev = dev->ieee80211_ptr;
if (!wdev)
@@ -2484,7 +2563,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
memset(sinfo, 0, sizeof(*sinfo));
- return rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
EXPORT_SYMBOL(cfg80211_get_station);
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 68729aa3a5..dc72302cbd 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -17,8 +17,6 @@ config X25
if you want that) and the lower level data link layer protocol LAPB
(say Y to "LAPB Data Link Driver" below if you want that).
- You can read more about X.25 at <https://www.sangoma.com/tutorials/x25/> and
- <http://docwiki.cisco.com/wiki/X.25>.
Information about X.25 for Linux is contained in the files
<file:Documentation/networking/x25.rst> and
<file:Documentation/networking/x25-iface.rst>.
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d18d51412c..8dda417849 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
return rc;
}
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int x25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index e9802afa43..643f50874d 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -71,7 +71,6 @@ static struct ctl_table x25_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
int __init x25_register_sysctl(void)
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index caa340134b..9f76ca591d 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -151,6 +151,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
#define XDP_UMEM_FLAGS_VALID ( \
XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
XDP_UMEM_TX_SW_CSUM | \
+ XDP_UMEM_TX_METADATA_LEN | \
0)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
@@ -204,8 +205,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
- if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
- return -EINVAL;
+ if (mr->flags & XDP_UMEM_TX_METADATA_LEN) {
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+ umem->tx_metadata_len = mr->tx_metadata_len;
+ }
umem->size = size;
umem->headroom = headroom;
@@ -215,7 +219,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
- umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ce60ecd48a..c0e0204b96 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->netdev = netdev;
dma_map->dev = dev;
- dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
@@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
- pool->dma_need_sync = dma_map->dma_need_sync;
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
@@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
__xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
- if (dma_need_sync(dev, dma))
- dma_map->dma_need_sync = true;
dma_map->dma_pages[i] = dma;
}
@@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data_meta = xskb->xdp.data;
xskb->xdp.flags = 0;
- if (pool->dma_need_sync) {
- dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
- pool->frame_len,
- DMA_BIDIRECTIONAL);
- }
+ if (pool->dev)
+ xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
+
return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);
@@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
- if (unlikely(pool->dma_need_sync)) {
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
struct xdp_buff *buff;
/* Slow path */
@@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
(addr & ~PAGE_MASK);
}
EXPORT_SYMBOL(xp_raw_get_dma);
-
-void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
-{
- dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
- xskb->pool->frame_len, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
-
-void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
- size_t size)
-{
- dma_sync_single_range_for_device(pool->dev, dma, 0,
- size, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 9f89553672..09dcea0cbb 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -194,6 +194,7 @@ static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
}
static const struct sock_diag_handler xsk_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_XDP,
.dump = xsk_diag_handler_dump,
};
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index d3b3f9e720..fe82e2d073 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
+#include <net/hotdata.h>
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -169,7 +170,8 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&ctx->out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 655fe4ff86..703d4172c7 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -98,6 +98,7 @@ static const int compat_msg_min[XFRM_NR_MSGTYPES] = {
};
static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -129,6 +130,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -277,9 +279,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_SET_MARK_MASK:
case XFRMA_IF_ID:
case XFRMA_MTIMER_THRESH:
+ case XFRMA_SA_DIR:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -434,7 +437,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6346690d5c..2455a76a1c 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -253,6 +253,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
+ if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) ||
+ (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) {
+ NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction");
+ return -EINVAL;
+ }
+
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
/* We don't yet support UDP encapsulation and TFC padding. */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index bd4ce21d76..e95462b982 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,7 @@
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
#include <net/dst_metadata.h>
+#include <net/hotdata.h>
#include "xfrm_inout.h"
@@ -388,11 +389,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ip_hdr(skb)->tot_len = htons(skb->len + ihl);
@@ -403,11 +408,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
@@ -570,6 +579,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ secpath_reset(skb);
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_audit_state_notfound(skb, family, spi, seq);
+ xfrm_state_put(x);
+ x = NULL;
+ goto drop;
+ }
+
skb->mark = xfrm_smark_get(skb->mark, x);
sp->xvec[sp->len++] = x;
@@ -764,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 7d5e920141..5ea15037eb 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_START(xfrm_ifc_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
-BTF_SET8_END(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_END(xfrm_ifc_kfunc_set)
static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 21d50d75c2..e50e4bf993 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -240,7 +240,6 @@ static void xfrmi_dev_free(struct net_device *dev)
struct xfrm_if *xi = netdev_priv(dev);
gro_cells_destroy(&xi->gro_cells);
- free_percpu(dev->tstats);
}
static int xfrmi_create(struct net_device *dev)
@@ -727,7 +726,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->p.link;
+ return READ_ONCE(xi->p.link);
}
static const struct net_device_ops xfrmi_netdev_ops = {
@@ -749,6 +748,7 @@ static void xfrmi_dev_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->needs_free_netdev = true;
dev->priv_destructor = xfrmi_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
eth_broadcast_addr(dev->broadcast);
@@ -765,15 +765,9 @@ static int xfrmi_dev_init(struct net_device *dev)
struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&xi->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
dev->features |= NETIF_F_LLTX;
dev->features |= XFRMI_FEATURES;
@@ -932,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->net;
+ return READ_ONCE(xi->net);
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
@@ -957,12 +951,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_exit_list, exit_list) {
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
@@ -973,18 +967,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
for (xip = &xfrmn->xfrmi[i];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
if (xi)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch = xfrmi_exit_batch_net,
+ .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index da6ecc6b3e..56b88ad88d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
#include <linux/audit.h>
#include <linux/rhashtable.h>
#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/inet_ecn.h>
@@ -451,6 +452,8 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ xfrm_dev_policy_delete(policy);
+
write_lock_bh(&policy->lock);
policy->walk.dead = 1;
write_unlock_bh(&policy->lock);
@@ -1849,7 +1852,6 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -1890,7 +1892,6 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -2341,7 +2342,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
pol = __xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (pol) {
- xfrm_dev_policy_delete(pol);
xfrm_policy_kill(pol);
return 0;
}
@@ -2488,6 +2488,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
family, policy->if_id);
+ if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
+ xfrm_state_put(x);
+ error = -EINVAL;
+ goto fail;
+ }
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@@ -2597,8 +2603,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
+ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
path->u.rt6.rt6i_nfheader_len = nfheader_len;
}
}
@@ -3505,6 +3510,130 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
return 0;
}
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (family == AF_INET &&
+ fl4->flowi4_proto == IPPROTO_ICMP &&
+ (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+ fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6) {
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+ (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+ fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+ fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+ const struct flowi *fl, struct flowi *fl1)
+{
+ bool ret = true;
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) :
+ (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+ if (!newskb)
+ return true;
+
+ if (!pskb_pull(newskb, hl))
+ goto out;
+
+ skb_reset_network_header(newskb);
+
+ if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+ goto out;
+
+ fl1->flowi_oif = fl->flowi_oif;
+ fl1->flowi_mark = fl->flowi_mark;
+ fl1->flowi_tos = fl->flowi_tos;
+ nf_nat_decode_session(newskb, fl1, family);
+ ret = false;
+
+out:
+ consume_skb(newskb);
+ return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+ const struct xfrm_selector *sel,
+ const struct flowi *fl)
+{
+ bool ret = false;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return ret;
+
+ ret = xfrm_selector_match(sel, &fl1, family);
+ }
+
+ return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+ const struct flowi *fl, unsigned short family,
+ u32 if_id)
+{
+ struct xfrm_policy *pol = NULL;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+ struct net *net = dev_net(skb->dev);
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return pol;
+
+ pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
+ }
+
+ return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+ unsigned short family, struct dst_entry *dst)
+{
+ if (icmp_err_packet(fl, family)) {
+ struct net *net = dev_net(skb->dev);
+ struct dst_entry *dst2;
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return dst;
+
+ dst_hold(dst);
+
+ dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+ if (IS_ERR(dst2))
+ return dst;
+
+ if (dst2->xfrm) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ dst_release(dst2);
+ }
+ }
+
+ return dst;
+}
+
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
unsigned short family)
{
@@ -3551,9 +3680,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
+ int ret = 0;
+
if (!xfrm_selector_match(&x->sel, &fl, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
- return 0;
+ ret = 1;
+ if (x->props.flags & XFRM_STATE_ICMP &&
+ xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+ ret = 0;
+ if (ret) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+ return 0;
+ }
}
}
}
@@ -3576,6 +3713,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 0;
}
+ if (!pol && dir == XFRM_POLICY_FWD)
+ pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
if (!pol) {
if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3709,6 +3849,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
res = 0;
dst = NULL;
}
+
+ if (dst && !dst->xfrm)
+ dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
skb_dst_set(skb, dst);
return res;
}
@@ -3765,15 +3909,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
/* Impossible. Such dst must be popped before reaches point of failure. */
}
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst) {
- if (dst->obsolete) {
- dst_release(dst);
- dst = NULL;
- }
- }
- return dst;
+ if (dst->obsolete)
+ sk_dst_reset(sk);
}
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
@@ -4027,10 +4166,7 @@ static int __net_init xfrm_policy_init(struct net *net)
int dir, err;
if (net_eq(net, &init_net)) {
- xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
- sizeof(struct xfrm_dst),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL);
+ xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = rhashtable_init(&xfrm_policy_inexact_table,
&xfrm_pol_inexact_params);
BUG_ON(err);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fee9b5cf37..eeb984be03 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,8 @@ static const struct snmp_mib xfrm_mib_list[] = {
SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
+ SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
+ SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
SNMP_MIB_SENTINEL
};
@@ -52,6 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+ xfrm_state_update_stats(net);
snmp_get_cpu_field_batch(buff, xfrm_mib_list,
net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index ce56d659c5..bc56c63057 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -778,7 +778,8 @@ int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack)
}
if (x->props.flags & XFRM_STATE_ESN) {
- if (replay_esn->replay_window == 0) {
+ if (replay_esn->replay_window == 0 &&
+ (!x->dir || x->dir == XFRM_SA_DIR_IN)) {
NL_SET_ERR_MSG(extack, "ESN replay window must be > 0");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index bda5327bf3..67b2a399a4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -49,6 +49,7 @@ static struct kmem_cache *xfrm_state_cache __ro_after_init;
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
static HLIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_dev_gc_list);
static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
{
@@ -214,6 +215,7 @@ static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
static DEFINE_SPINLOCK(xfrm_state_gc_lock);
+static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
@@ -570,7 +572,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
int err = 0;
spin_lock(&x->lock);
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (x->km.state == XFRM_STATE_DEAD)
goto out;
@@ -683,6 +685,41 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
}
EXPORT_SYMBOL(xfrm_state_alloc);
+#ifdef CONFIG_XFRM_OFFLOAD
+void xfrm_dev_state_delete(struct xfrm_state *x)
+{
+ struct xfrm_dev_offload *xso = &x->xso;
+ struct net_device *dev = READ_ONCE(xso->dev);
+
+ if (dev) {
+ dev->xfrmdev_ops->xdo_dev_state_delete(x);
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list);
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_state_delete);
+
+void xfrm_dev_state_free(struct xfrm_state *x)
+{
+ struct xfrm_dev_offload *xso = &x->xso;
+ struct net_device *dev = READ_ONCE(xso->dev);
+
+ if (dev && dev->xfrmdev_ops) {
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ if (!hlist_unhashed(&x->dev_gclist))
+ hlist_del(&x->dev_gclist);
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+
+ if (dev->xfrmdev_ops->xdo_dev_state_free)
+ dev->xfrmdev_ops->xdo_dev_state_free(x);
+ WRITE_ONCE(xso->dev, NULL);
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ netdev_put(dev, &xso->dev_tracker);
+ }
+}
+#endif
+
void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
@@ -848,6 +885,9 @@ EXPORT_SYMBOL(xfrm_state_flush);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
{
+ struct xfrm_state *x;
+ struct hlist_node *tmp;
+ struct xfrm_dev_offload *xso;
int i, err = 0, cnt = 0;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -857,8 +897,6 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
- struct xfrm_state *x;
- struct xfrm_dev_offload *xso;
restart:
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
@@ -868,6 +906,8 @@ restart:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = xfrm_state_delete(x);
+ xfrm_dev_state_free(x);
+
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
xfrm_state_put(x);
@@ -884,6 +924,24 @@ restart:
out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+restart_gc:
+ hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) {
+ xso = &x->xso;
+
+ if (xso->dev == dev) {
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+ xfrm_dev_state_free(x);
+ spin_lock_bh(&xfrm_state_dev_gc_lock);
+ goto restart_gc;
+ }
+
+ }
+ spin_unlock_bh(&xfrm_state_dev_gc_lock);
+
+ xfrm_flush_gc();
+
return err;
}
EXPORT_SYMBOL(xfrm_dev_state_flush);
@@ -1273,8 +1331,7 @@ found:
xso->dev = xdo->dev;
xso->real_dev = xdo->real_dev;
xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ;
- netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
- GFP_ATOMIC);
+ netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC);
error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
if (error) {
xso->dir = 0;
@@ -1292,6 +1349,7 @@ found:
if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
+ x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
@@ -1744,6 +1802,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
+ x->dir = orig->dir;
return x;
@@ -1864,8 +1923,14 @@ int xfrm_state_update(struct xfrm_state *x)
}
if (x1->km.state == XFRM_STATE_ACQ) {
+ if (x->dir && x1->dir != x->dir)
+ goto out;
+
__xfrm_state_insert(x);
x = NULL;
+ } else {
+ if (x1->dir != x->dir)
+ goto out;
}
err = 0;
@@ -1935,7 +2000,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (!READ_ONCE(x->curlft.use_time))
WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
@@ -1957,6 +2022,19 @@ int xfrm_state_check_expire(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_check_expire);
+void xfrm_state_update_stats(struct net *net)
+{
+ struct xfrm_state *x;
+ int i;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ xfrm_dev_state_update_stats(x);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+}
+
struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
index 9e20d4a377..2248eda741 100644
--- a/net/xfrm/xfrm_state_bpf.c
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_KFUNCS_START(xfrm_state_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
-BTF_SET8_END(xfrm_state_kfunc_set)
+BTF_KFUNCS_END(xfrm_state_kfunc_set)
static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 7fdeafc838..ca003e8a03 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -38,7 +38,6 @@ static struct ctl_table xfrm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- {}
};
int __net_init xfrm_sysctl_init(struct net *net)
@@ -57,10 +56,8 @@ int __net_init xfrm_sysctl_init(struct net *net)
table[3].data = &net->xfrm.sysctl_acq_expires;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table,
table_size);
@@ -76,7 +73,7 @@ out_kmemdup:
void __net_exit xfrm_sysctl_fini(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->xfrm.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->xfrm.sysctl_hdr);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 912c1189ba..77355422ce 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -130,7 +130,7 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_a
}
static inline int verify_replay(struct xfrm_usersa_info *p,
- struct nlattr **attrs,
+ struct nlattr **attrs, u8 sa_dir,
struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
@@ -168,6 +168,30 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
return -EINVAL;
}
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (rs->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->seq || rs->seq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and seq_hi should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->bmp_len) {
+ NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA");
+ return -EINVAL;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (rs->oseq || rs->oseq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay oseq and oseq_hi should be 0 for input SA");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -176,6 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
struct netlink_ext_ack *extack)
{
int err;
+ u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0;
err = -EINVAL;
switch (p->family) {
@@ -334,7 +359,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
if ((err = verify_sec_ctx_len(attrs, extack)))
goto out;
- if ((err = verify_replay(p, attrs, extack)))
+ if ((err = verify_replay(p, attrs, sa_dir, extack)))
goto out;
err = -EINVAL;
@@ -358,6 +383,77 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
err = -EINVAL;
goto out;
}
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ NL_SET_ERR_MSG(extack,
+ "MTIMER_THRESH attribute should not be set on output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (p->flags & XFRM_STATE_DECAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_ICMP) {
+ NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_WILDRECV) {
+ NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_REPLAY_VAL]) {
+ struct xfrm_replay_state *replay;
+
+ replay = nla_data(attrs[XFRMA_REPLAY_VAL]);
+
+ if (replay->seq || replay->bitmap) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and bitmap should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (p->flags & XFRM_STATE_NOPMTUDISC) {
+ NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_SA_EXTRA_FLAGS]) {
+ u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
+ if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) {
+ NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ }
}
out:
@@ -734,6 +830,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -902,7 +1001,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
if (x->xso.dev)
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
put_unaligned(x->stats.replay_window, &p->stats.replay_window);
put_unaligned(x->stats.replay, &p->stats.replay);
@@ -1182,8 +1281,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
- if (x->mapping_maxage)
+ if (x->mapping_maxage) {
ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
+ if (ret)
+ goto out;
+ }
+ if (x->dir)
+ ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
out:
return ret;
}
@@ -1618,6 +1722,9 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
goto out;
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
@@ -2348,7 +2455,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
- xfrm_dev_policy_delete(xp);
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
@@ -2402,7 +2508,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur))
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
- + nla_total_size(4); /* XFRM_AE_ETHR */
+ + nla_total_size(4) /* XFRM_AE_ETHR */
+ + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2459,6 +2566,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
goto out_cancel;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ goto out_cancel;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3018,6 +3131,7 @@ EXPORT_SYMBOL_GPL(xfrm_msg_min);
#undef XMSGSIZE
const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -3049,6 +3163,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3097,6 +3212,24 @@ static const struct xfrm_link {
[XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
+static int xfrm_reject_unused_attr(int type, struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
+{
+ if (attrs[XFRMA_SA_DIR]) {
+ switch (type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_ALLOCSPI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid attribute SA_DIR");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -3156,6 +3289,12 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto err;
+ if (!link->nla_pol || link->nla_pol == xfrma_policy) {
+ err = xfrm_reject_unused_attr((type + XFRM_MSG_BASE), attrs, extack);
+ if (err < 0)
+ goto err;
+ }
+
if (link->doit == NULL) {
err = -EINVAL;
goto err;
@@ -3189,8 +3328,9 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
static inline unsigned int xfrm_expire_msgsize(void)
{
- return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
- + nla_total_size(sizeof(struct xfrm_mark));
+ return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
+ nla_total_size(sizeof(struct xfrm_mark)) +
+ nla_total_size(sizeof_field(struct xfrm_state, dir));
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3217,6 +3357,12 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
return err;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ return err;
+ }
+
nlmsg_end(skb, nlh);
return 0;
}
@@ -3324,6 +3470,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->mapping_maxage)
l += nla_total_size(sizeof(x->mapping_maxage));
+ if (x->dir)
+ l += nla_total_size(sizeof(x->dir));
+
return l;
}